summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-07-13 17:19:57 +0000
commit9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74 (patch)
tree9de1c5f67a98cd0e73c60838396486c984f63ac2 /lib
parent1e3dec662ea18131c495db50caccc57f77b7a5fe (diff)
downloadFreeBSD-src-9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74.zip
FreeBSD-src-9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74.tar.gz
Update LLVM to r108243.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysis.cpp4
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp1
-rw-r--r--lib/Analysis/AliasDebugger.cpp12
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp145
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/ConstantFolding.cpp18
-rw-r--r--lib/Analysis/DebugInfo.cpp80
-rw-r--r--lib/Analysis/DomPrinter.cpp4
-rw-r--r--lib/Analysis/IPA/CallGraph.cpp8
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp38
-rw-r--r--lib/Analysis/InlineCost.cpp27
-rw-r--r--lib/Analysis/Lint.cpp239
-rw-r--r--lib/Analysis/Loads.cpp235
-rw-r--r--lib/Analysis/LoopInfo.cpp7
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp22
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp29
-rw-r--r--lib/Analysis/PostDominators.cpp5
-rw-r--r--lib/Analysis/ProfileInfo.cpp6
-rw-r--r--lib/Analysis/ScalarEvolution.cpp325
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp40
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp135
-rw-r--r--lib/Analysis/ScalarEvolutionNormalization.cpp15
-rw-r--r--lib/Analysis/ValueTracking.cpp4
-rw-r--r--lib/Archive/ArchiveWriter.cpp12
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp42
-rw-r--r--lib/AsmParser/LLToken.h6
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp21
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp33
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp13
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.h7
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp83
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp94
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp246
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h18
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp6
-rw-r--r--lib/CodeGen/BranchFolding.cpp68
-rw-r--r--lib/CodeGen/BranchFolding.h5
-rw-r--r--lib/CodeGen/CMakeLists.txt7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp2
-rw-r--r--lib/CodeGen/CallingConvLower.cpp (renamed from lib/CodeGen/SelectionDAG/CallingConvLower.cpp)14
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp4
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp158
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h5
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp110
-rw-r--r--lib/CodeGen/ELFCodeEmitter.cpp2
-rw-r--r--lib/CodeGen/ExactHazardRecognizer.h86
-rw-r--r--lib/CodeGen/GCStrategy.cpp6
-rw-r--r--lib/CodeGen/IfConversion.cpp400
-rw-r--r--lib/CodeGen/InlineSpiller.cpp408
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp51
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp15
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp2
-rw-r--r--lib/CodeGen/LiveInterval.cpp66
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp236
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp4
-rw-r--r--lib/CodeGen/LiveVariables.cpp9
-rw-r--r--lib/CodeGen/LowerSubregs.cpp217
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp129
-rw-r--r--lib/CodeGen/MachineCSE.cpp46
-rw-r--r--lib/CodeGen/MachineDominators.cpp1
-rw-r--r--lib/CodeGen/MachineFunction.cpp16
-rw-r--r--lib/CodeGen/MachineInstr.cpp113
-rw-r--r--lib/CodeGen/MachineLICM.cpp118
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp130
-rw-r--r--lib/CodeGen/MachineSink.cpp102
-rw-r--r--lib/CodeGen/MachineVerifier.cpp3
-rw-r--r--lib/CodeGen/OptimizeExts.cpp24
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp5
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h2
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h5
-rw-r--r--lib/CodeGen/PHIElimination.cpp63
-rw-r--r--lib/CodeGen/Passes.cpp26
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp (renamed from lib/CodeGen/ExactHazardRecognizer.cpp)26
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp51
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp89
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp33
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp65
-rw-r--r--lib/CodeGen/RegAllocFast.cpp226
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp33
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp1254
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp25
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp156
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp33
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp37
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp14
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h5
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt1
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp267
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp354
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp63
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h144
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp133
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp335
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp40
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp150
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp72
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp140
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp17
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp241
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp158
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp1248
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h12
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp421
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp218
-rw-r--r--lib/CodeGen/ShadowStackGC.cpp14
-rw-r--r--lib/CodeGen/SimpleHazardRecognizer.h89
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp1790
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h84
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp114
-rw-r--r--lib/CodeGen/SlotIndexes.cpp4
-rw-r--r--lib/CodeGen/Spiller.cpp209
-rw-r--r--lib/CodeGen/Spiller.h18
-rw-r--r--lib/CodeGen/StackProtector.cpp16
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp14
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp22
-rw-r--r--lib/CodeGen/TailDuplication.cpp18
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp182
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp112
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp303
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp110
-rw-r--r--lib/CompilerDriver/Tool.cpp3
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp2
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp54
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h34
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp10
-rw-r--r--lib/Linker/LinkItems.cpp29
-rw-r--r--lib/MC/CMakeLists.txt3
-rw-r--r--lib/MC/MCAsmStreamer.cpp24
-rw-r--r--lib/MC/MCAssembler.cpp18
-rw-r--r--lib/MC/MCContext.cpp11
-rw-r--r--lib/MC/MCExpr.cpp6
-rw-r--r--lib/MC/MCMachOStreamer.cpp161
-rw-r--r--lib/MC/MCObjectStreamer.cpp39
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp7
-rw-r--r--lib/MC/MCParser/AsmParser.cpp786
-rw-r--r--lib/MC/MCParser/CMakeLists.txt3
-rw-r--r--lib/MC/MCParser/DarwinAsmParser.cpp758
-rw-r--r--lib/MC/MCParser/ELFAsmParser.cpp68
-rw-r--r--lib/MC/MCParser/MCAsmLexer.cpp6
-rw-r--r--lib/MC/MCParser/MCAsmParser.cpp6
-rw-r--r--lib/MC/MCParser/MCAsmParserExtension.cpp21
-rw-r--r--lib/MC/MCSectionCOFF.cpp14
-rw-r--r--lib/MC/MachObjectWriter.cpp61
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp71
-rw-r--r--lib/MC/WinCOFFStreamer.cpp198
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/DAGDeltaAlgorithm.cpp357
-rw-r--r--lib/Support/DeltaAlgorithm.cpp4
-rw-r--r--lib/Support/Dwarf.cpp511
-rw-r--r--lib/Support/FileUtilities.cpp26
-rw-r--r--lib/Support/FoldingSet.cpp20
-rw-r--r--lib/Support/MemoryBuffer.cpp180
-rw-r--r--lib/Support/PrettyStackTrace.cpp23
-rw-r--r--lib/Support/SmallPtrSet.cpp9
-rw-r--r--lib/Support/SmallVector.cpp21
-rw-r--r--lib/Support/Timer.cpp10
-rw-r--r--lib/Support/Triple.cpp5
-rw-r--r--lib/Support/raw_ostream.cpp8
-rw-r--r--lib/System/Disassembler.cpp16
-rw-r--r--lib/System/Path.cpp29
-rw-r--r--lib/System/Unix/Path.inc23
-rw-r--r--lib/System/Unix/Program.inc2
-rw-r--r--lib/System/Unix/Signals.inc17
-rw-r--r--lib/System/Win32/Path.inc6
-rw-r--r--lib/System/Win32/Signals.inc2
-rw-r--r--lib/Target/ARM/ARM.h4
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h64
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp629
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h87
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp191
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h24
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp174
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp58
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h1
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp10
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp551
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp893
-rw-r--r--lib/Target/ARM/ARMISelLowering.h47
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td44
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h2
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td128
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td99
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td23
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td63
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMJITInfo.h3
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp194
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td87
-rw-r--r--lib/Target/ARM/ARMScheduleA8.td84
-rw-r--r--lib/Target/ARM/ARMScheduleA9.td364
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td2
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp30
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp23
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp59
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp24
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h5
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp113
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.h8
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h42
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp4
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp51
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp125
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h24
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp17
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h6
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.cpp53
-rw-r--r--lib/Target/ARM/Thumb2HazardRecognizer.h40
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp160
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp192
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h39
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp16
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp39
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.h2
-rw-r--r--lib/Target/Alpha/AlphaInstrFormats.td4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp109
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h27
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td34
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp14
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h3
-rw-r--r--lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp13
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.h2
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp108
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h15
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td12
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp30
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h6
-rw-r--r--lib/Target/CBackend/CBackend.cpp50
-rw-r--r--lib/Target/CellSPU/SPUCallingConv.td82
-rw-r--r--lib/Target/CellSPU/SPUFrameInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp15
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp126
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.h3
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp131
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h33
-rw-r--r--lib/Target/CellSPU/SPUNodes.td2
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp57
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h13
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp3341
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp88
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp65
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h25
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp16
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h3
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp23
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp3
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp43
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h3
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp39
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h12
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td455
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp46
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h3
-rw-r--r--lib/Target/Mangler.cpp2
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp5
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp55
-rw-r--r--lib/Target/Mips/MipsISelLowering.h2
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp213
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h25
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp35
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h3
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp46
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.h4
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp37
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h13
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.td18
-rw-r--r--lib/Target/PIC16/PIC16MemSelOpt.cpp2
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp24
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h4
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp7
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp124
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp184
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h30
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp166
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h3
-rw-r--r--lib/Target/README.txt134
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp61
-rw-r--r--lib/Target/Sparc/SparcISelLowering.h2
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp98
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h25
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp7
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h3
-rw-r--r--lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp29
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.h3
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp96
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td17
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td10
-rw-r--r--lib/Target/TargetInstrInfo.cpp4
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--lib/Target/TargetRegisterInfo.cpp12
-rw-r--r--lib/Target/X86/AsmParser/X86AsmLexer.cpp85
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp105
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp21
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h11
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp7
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp23
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h15
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp74
-rw-r--r--lib/Target/X86/Disassembler/CMakeLists.txt4
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp15
-rw-r--r--lib/Target/X86/README-SSE.txt184
-rw-r--r--lib/Target/X86/README-X86-64.txt249
-rw-r--r--lib/Target/X86/README.txt103
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp8
-rw-r--r--lib/Target/X86/X86CallingConv.td12
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp20
-rw-r--r--lib/Target/X86/X86FastISel.cpp441
-rw-r--r--lib/Target/X86/X86FixupKinds.h1
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp111
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp17
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp196
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp902
-rw-r--r--lib/Target/X86/X86ISelLowering.h43
-rw-r--r--lib/Target/X86/X86Instr64bit.td238
-rw-r--r--lib/Target/X86/X86InstrBuilder.h39
-rw-r--r--lib/Target/X86/X86InstrFPStack.td16
-rw-r--r--lib/Target/X86/X86InstrFormats.td90
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td336
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp642
-rw-r--r--lib/Target/X86/X86InstrInfo.h164
-rw-r--r--lib/Target/X86/X86InstrInfo.td369
-rw-r--r--lib/Target/X86/X86InstrMMX.td14
-rw-r--r--lib/Target/X86/X86InstrSSE.td6263
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp543
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp117
-rw-r--r--lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--lib/Target/X86/X86RegisterInfo.td32
-rw-r--r--lib/Target/X86/X86Subtarget.cpp69
-rw-r--r--lib/Target/X86/X86Subtarget.h47
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp8
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp1
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp61
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h8
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp70
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h15
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp14
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h3
-rw-r--r--lib/Transforms/Hello/Hello.cpp4
-rw-r--r--lib/Transforms/Hello/Hello.exports0
-rw-r--r--lib/Transforms/Hello/Makefile8
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp15
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp7
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp51
-rw-r--r--lib/Transforms/IPO/IPConstantPropagation.cpp7
-rw-r--r--lib/Transforms/IPO/Inliner.cpp2
-rw-r--r--lib/Transforms/IPO/LowerSetJmp.cpp18
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp3
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp10
-rw-r--r--lib/Transforms/IPO/PartialSpecialization.cpp48
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp133
-rw-r--r--lib/Transforms/IPO/StructRetPromotion.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h3
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp26
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp156
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp15
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp37
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp35
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp31
-rw-r--r--lib/Transforms/InstCombine/InstCombineShifts.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp16
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp76
-rw-r--r--lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp8
-rw-r--r--lib/Transforms/Instrumentation/ProfilingUtils.cpp12
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp6
-rw-r--r--lib/Transforms/Scalar/ADCE.cpp2
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp8
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp59
-rw-r--r--lib/Transforms/Scalar/GVN.cpp61
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp16
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp73
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp6
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp28
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp143
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp28
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp4
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp53
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp3
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp213
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp5
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp96
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp21
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp118
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp23
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp67
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp100
-rw-r--r--lib/Transforms/Utils/CloneLoop.cpp33
-rw-r--r--lib/Transforms/Utils/CloneModule.cpp56
-rw-r--r--lib/Transforms/Utils/DemoteRegToStack.cpp22
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp23
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp7
-rw-r--r--lib/Transforms/Utils/Local.cpp113
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp49
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp20
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp140
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp16
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp15
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp10
-rw-r--r--lib/Transforms/Utils/ValueMapper.h4
-rw-r--r--lib/VMCore/AsmWriter.cpp33
-rw-r--r--lib/VMCore/AutoUpgrade.cpp48
-rw-r--r--lib/VMCore/ConstantFold.cpp13
-rw-r--r--lib/VMCore/Core.cpp18
-rw-r--r--lib/VMCore/Instruction.cpp20
-rw-r--r--lib/VMCore/Instructions.cpp65
-rw-r--r--lib/VMCore/IntrinsicInst.cpp6
-rw-r--r--lib/VMCore/Metadata.cpp1
-rw-r--r--lib/VMCore/Module.cpp9
-rw-r--r--lib/VMCore/Pass.cpp45
-rw-r--r--lib/VMCore/PassManager.cpp5
-rw-r--r--lib/VMCore/Value.cpp10
-rw-r--r--lib/VMCore/Verifier.cpp44
443 files changed, 23478 insertions, 20095 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp
index 371dcaf..503fbbd 100644
--- a/lib/Analysis/AliasAnalysis.cpp
+++ b/lib/Analysis/AliasAnalysis.cpp
@@ -233,10 +233,12 @@ bool llvm::isNoAliasCall(const Value *V) {
/// NoAlias returns
///
bool llvm::isIdentifiedObject(const Value *V) {
- if (isa<AllocaInst>(V) || isNoAliasCall(V))
+ if (isa<AllocaInst>(V))
return true;
if (isa<GlobalValue>(V) && !isa<GlobalAlias>(V))
return true;
+ if (isNoAliasCall(V))
+ return true;
if (const Argument *A = dyn_cast<Argument>(V))
return A->hasNoAliasAttr() || A->hasByValAttr();
return false;
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index bfa3ff1..37ee9fc 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -25,7 +25,6 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Assembly/Writer.h"
-#include "llvm/Target/TargetData.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/CommandLine.h"
diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp
index 88c2875..bc2d9c55 100644
--- a/lib/Analysis/AliasDebugger.cpp
+++ b/lib/Analysis/AliasDebugger.cpp
@@ -45,8 +45,12 @@ namespace {
InitializeAliasAnalysis(this); // set up super class
for(Module::global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
+ E = M.global_end(); I != E; ++I) {
Vals.insert(&*I);
+ for (User::const_op_iterator OI = I->op_begin(),
+ OE = I->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
for(Module::iterator I = M.begin(),
E = M.end(); I != E; ++I){
@@ -58,8 +62,12 @@ namespace {
for (Function::const_iterator FI = I->begin(), FE = I->end();
FI != FE; ++FI)
for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end();
- BI != BE; ++BI)
+ BI != BE; ++BI) {
Vals.insert(&*BI);
+ for (User::const_op_iterator OI = BI->op_begin(),
+ OE = BI->op_end(); OI != OE; ++OI)
+ Vals.insert(*OI);
+ }
}
}
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index cfe7a1c..4f53a6d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -78,6 +78,20 @@ static bool isNonEscapingLocalObject(const Value *V) {
return false;
}
+/// isEscapeSource - Return true if the pointer is one which would have
+/// been considered an escape by isNonEscapingLocalObject.
+static bool isEscapeSource(const Value *V) {
+ if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))
+ return true;
+
+ // The load case works because isNonEscapingLocalObject considers all
+ // stores to be escapes (it passes true for the StoreCaptures argument
+ // to PointerMayBeCaptured).
+ if (isa<LoadInst>(V))
+ return true;
+
+ return false;
+}
/// isObjectSmallerThan - Return true if we can prove that the object specified
/// by V is smaller than Size.
@@ -94,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
} else if (const CallInst* CI = extractMallocCall(V)) {
if (!isArrayMalloc(V, &TD))
// The size is the argument to the malloc call.
- if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1)))
+ if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getArgOperand(0)))
return (C->getZExtValue() < Size);
return false;
} else if (const Argument *A = dyn_cast<Argument>(V)) {
@@ -177,9 +191,29 @@ static RegisterAnalysisGroup<AliasAnalysis> V(U);
ImmutablePass *llvm::createNoAAPass() { return new NoAA(); }
//===----------------------------------------------------------------------===//
-// BasicAA Pass
+// BasicAliasAnalysis Pass
//===----------------------------------------------------------------------===//
+#ifndef NDEBUG
+static const Function *getParent(const Value *V) {
+ if (const Instruction *inst = dyn_cast<Instruction>(V))
+ return inst->getParent()->getParent();
+
+ if (const Argument *arg = dyn_cast<Argument>(V))
+ return arg->getParent();
+
+ return NULL;
+}
+
+static bool notDifferentParent(const Value *O1, const Value *O2) {
+
+ const Function *F1 = getParent(O1);
+ const Function *F2 = getParent(O2);
+
+ return !F1 || !F2 || F1 == F2;
+}
+#endif
+
namespace {
/// BasicAliasAnalysis - This is the default alias analysis implementation.
/// Because it doesn't chain to a previous alias analysis (like -no-aa), it
@@ -187,11 +221,14 @@ namespace {
struct BasicAliasAnalysis : public NoAA {
static char ID; // Class identification, replacement for typeinfo
BasicAliasAnalysis() : NoAA(&ID) {}
+
AliasResult alias(const Value *V1, unsigned V1Size,
const Value *V2, unsigned V2Size) {
- assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!");
+ assert(Visited.empty() && "Visited must be cleared after use!");
+ assert(notDifferentParent(V1, V2) &&
+ "BasicAliasAnalysis doesn't support interprocedural queries.");
AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size);
- VisitedPHIs.clear();
+ Visited.clear();
return Alias;
}
@@ -213,8 +250,8 @@ namespace {
}
private:
- // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call.
- SmallPtrSet<const Value*, 16> VisitedPHIs;
+ // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP().
+ SmallPtrSet<const Value*, 16> Visited;
// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP
// instruction against another.
@@ -268,6 +305,9 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) {
/// simple "address taken" analysis on local objects.
AliasAnalysis::ModRefResult
BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
+ assert(notDifferentParent(CS.getInstruction(), P) &&
+ "AliasAnalysis query involving multiple functions!");
+
const Value *Object = P->getUnderlyingObject();
// If this is a tail call and P points to a stack location, we know that
@@ -318,10 +358,10 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
case Intrinsic::memcpy:
case Intrinsic::memmove: {
unsigned Len = ~0U;
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3)))
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2)))
Len = LenCI->getZExtValue();
- Value *Dest = II->getOperand(1);
- Value *Src = II->getOperand(2);
+ Value *Dest = II->getArgOperand(0);
+ Value *Src = II->getArgOperand(1);
if (isNoAlias(Dest, Len, P, Size)) {
if (isNoAlias(Src, Len, P, Size))
return NoModRef;
@@ -332,9 +372,9 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
case Intrinsic::memset:
// Since memset is 'accesses arguments' only, the AliasAnalysis base class
// will handle it for the variable length case.
- if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getOperand(3))) {
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) {
unsigned Len = LenCI->getZExtValue();
- Value *Dest = II->getOperand(1);
+ Value *Dest = II->getArgOperand(0);
if (isNoAlias(Dest, Len, P, Size))
return NoModRef;
}
@@ -352,7 +392,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
case Intrinsic::atomic_load_umax:
case Intrinsic::atomic_load_umin:
if (TD) {
- Value *Op1 = II->getOperand(1);
+ Value *Op1 = II->getArgOperand(0);
unsigned Op1Size = TD->getTypeStoreSize(Op1->getType());
if (isNoAlias(Op1, Op1Size, P, Size))
return NoModRef;
@@ -361,14 +401,14 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start: {
- unsigned PtrSize = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
- if (isNoAlias(II->getOperand(2), PtrSize, P, Size))
+ unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size))
return NoModRef;
break;
}
case Intrinsic::invariant_end: {
- unsigned PtrSize = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
- if (isNoAlias(II->getOperand(3), PtrSize, P, Size))
+ unsigned PtrSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
+ if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size))
return NoModRef;
break;
}
@@ -440,6 +480,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
const Value *V2, unsigned V2Size,
const Value *UnderlyingV1,
const Value *UnderlyingV2) {
+ // If this GEP has been visited before, we're on a use-def cycle.
+ // Such cycles are only valid when PHI nodes are involved or in unreachable
+ // code. The visitPHI function catches cycles containing PHIs, but there
+ // could still be a cycle without PHIs in unreachable code.
+ if (!Visited.insert(GEP1))
+ return MayAlias;
+
int64_t GEP1BaseOffset;
SmallVector<std::pair<const Value*, int64_t>, 4> GEP1VariableIndices;
@@ -550,6 +597,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size,
AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
const Value *V2, unsigned V2Size) {
+ // If this select has been visited before, we're on a use-def cycle.
+ // Such cycles are only valid when PHI nodes are involved or in unreachable
+ // code. The visitPHI function catches cycles containing PHIs, but there
+ // could still be a cycle without PHIs in unreachable code.
+ if (!Visited.insert(SI))
+ return MayAlias;
+
// If the values are Selects with the same condition, we can do a more precise
// check: just check for aliases between the values on corresponding arms.
if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -570,11 +624,17 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize,
// If both arms of the Select node NoAlias or MustAlias V2, then returns
// NoAlias / MustAlias. Otherwise, returns MayAlias.
AliasResult Alias =
- aliasCheck(SI->getTrueValue(), SISize, V2, V2Size);
+ aliasCheck(V2, V2Size, SI->getTrueValue(), SISize);
if (Alias == MayAlias)
return MayAlias;
+
+ // If V2 is visited, the recursive case will have been caught in the
+ // above aliasCheck call, so these subsequent calls to aliasCheck
+ // don't need to assume that V2 is being visited recursively.
+ Visited.erase(V2);
+
AliasResult ThisAlias =
- aliasCheck(SI->getFalseValue(), SISize, V2, V2Size);
+ aliasCheck(V2, V2Size, SI->getFalseValue(), SISize);
if (ThisAlias != Alias)
return MayAlias;
return Alias;
@@ -586,7 +646,7 @@ AliasAnalysis::AliasResult
BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
const Value *V2, unsigned V2Size) {
// The PHI node has already been visited, avoid recursion any further.
- if (!VisitedPHIs.insert(PN))
+ if (!Visited.insert(PN))
return MayAlias;
// If the values are PHIs in the same block, we can do a more precise
@@ -636,10 +696,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize,
for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {
Value *V = V1Srcs[i];
- // If V2 is a PHI, the recursive case will have been caught in the
+ // If V2 is visited, the recursive case will have been caught in the
// above aliasCheck call, so these subsequent calls to aliasCheck
// don't need to assume that V2 is being visited recursively.
- VisitedPHIs.erase(V2);
+ Visited.erase(V2);
AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize);
if (ThisAlias != Alias || ThisAlias == MayAlias)
@@ -693,17 +753,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
(isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
return NoAlias;
- // Arguments can't alias with local allocations or noalias calls.
- if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
- (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))
+ // Arguments can't alias with local allocations or noalias calls
+ // in the same function.
+ if (((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
+ (isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1)))))
return NoAlias;
// Most objects can't alias null.
- if ((isa<ConstantPointerNull>(V2) && isKnownNonNull(O1)) ||
- (isa<ConstantPointerNull>(V1) && isKnownNonNull(O2)))
+ if ((isa<ConstantPointerNull>(O2) && isKnownNonNull(O1)) ||
+ (isa<ConstantPointerNull>(O1) && isKnownNonNull(O2)))
return NoAlias;
- }
+ // If one pointer is the result of a call/invoke or load and the other is a
+ // non-escaping local object within the same function, then we know the
+ // object couldn't escape to a point where the call could return it.
+ //
+ // Note that if the pointers are in different functions, there are a
+ // variety of complications. A call with a nocapture argument may still
+ // temporary store the nocapture argument's value in a temporary memory
+ // location if that memory location doesn't escape. Or it may pass a
+ // nocapture value to other functions as long as they don't capture it.
+ if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+ return NoAlias;
+ if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+ return NoAlias;
+ }
+
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
if (TD)
@@ -711,22 +786,6 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
(V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
return NoAlias;
- // If one pointer is the result of a call/invoke or load and the other is a
- // non-escaping local object, then we know the object couldn't escape to a
- // point where the call could return it. The load case works because
- // isNonEscapingLocalObject considers all stores to be escapes (it
- // passes true for the StoreCaptures argument to PointerMayBeCaptured).
- if (O1 != O2) {
- if ((isa<CallInst>(O1) || isa<InvokeInst>(O1) || isa<LoadInst>(O1) ||
- isa<Argument>(O1)) &&
- isNonEscapingLocalObject(O2))
- return NoAlias;
- if ((isa<CallInst>(O2) || isa<InvokeInst>(O2) || isa<LoadInst>(O2) ||
- isa<Argument>(O2)) &&
- isNonEscapingLocalObject(O1))
- return NoAlias;
- }
-
// FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the
// GEP can't simplify, we don't even look at the PHI cases.
if (!isa<GEPOperator>(V1) && isa<GEPOperator>(V2)) {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 5a37ce0..d9b670d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -23,6 +23,7 @@ add_llvm_library(LLVMAnalysis
LibCallSemantics.cpp
Lint.cpp
LiveValues.cpp
+ Loads.cpp
LoopDependenceAnalysis.cpp
LoopInfo.cpp
LoopPass.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 37cda02..13d8f4d 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -208,7 +208,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,
i != e; ++i, ++GTI) {
ConstantInt *CI = dyn_cast<ConstantInt>(*i);
if (!CI) return false; // Index isn't a simple constant?
- if (CI->getZExtValue() == 0) continue; // Not adding anything.
+ if (CI->isZero()) continue; // Not adding anything.
if (const StructType *ST = dyn_cast<StructType>(*GTI)) {
// N = N + Offset
@@ -436,8 +436,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
unsigned StrLen = Str.length();
const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
- // Replace LI with immediate integer store.
- if ((NumBits >> 3) == StrLen + 1) {
+ // Replace load with immediate integer if the result is an integer or fp
+ // value.
+ if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 &&
+ (isa<IntegerType>(Ty) || Ty->isFloatingPointTy())) {
APInt StrVal(NumBits, 0);
APInt SingleChar(NumBits, 0);
if (TD->isLittleEndian()) {
@@ -454,7 +456,11 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
SingleChar = 0;
StrVal = (StrVal << 8) | SingleChar;
}
- return ConstantInt::get(CE->getContext(), StrVal);
+
+ Constant *Res = ConstantInt::get(CE->getContext(), StrVal);
+ if (Ty->isFloatingPointTy())
+ Res = ConstantExpr::getBitCast(Res, Ty);
+ return Res;
}
}
@@ -772,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ICmp:
case Instruction::FCmp: assert(0 && "Invalid for compares");
case Instruction::Call:
- if (Function *F = dyn_cast<Function>(Ops[0]))
+ if (Function *F = dyn_cast<Function>(Ops[CallInst::ArgOffset ? 0:NumOps-1]))
if (canConstantFoldCallTo(F))
- return ConstantFoldCall(F, Ops+1, NumOps-1);
+ return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1);
return 0;
case Instruction::PtrToInt:
// If the input is a inttoptr, eliminate the pair. This requires knowing
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index a7b6d2b..c8d0d22 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -73,6 +73,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
return 0;
}
+Function *DIDescriptor::getFunctionField(unsigned Elt) const {
+ if (DbgNode == 0)
+ return 0;
+
+ if (Elt < DbgNode->getNumOperands())
+ return dyn_cast_or_null<Function>(DbgNode->getOperand(Elt));
+ return 0;
+}
+
unsigned DIVariable::getNumAddrElements() const {
return DbgNode->getNumOperands()-6;
}
@@ -397,6 +406,8 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
/// information for the function F.
bool DISubprogram::describes(const Function *F) {
assert(F && "Invalid function");
+ if (F == getFunction())
+ return true;
StringRef Name = getLinkageName();
if (Name.empty())
Name = getName();
@@ -938,7 +949,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
unsigned VK, unsigned VIndex,
DIType ContainingType,
bool isArtificial,
- bool isOptimized) {
+ bool isOptimized,
+ Function *Fn) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -956,9 +968,15 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
ContainingType,
ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
- ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized)
+ ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized),
+ Fn
};
- return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 17);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
}
/// CreateSubprogramDefinition - Create new subprogram descriptor for the
@@ -984,9 +1002,15 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration)
DeclNode->getOperand(12), // VIndex
DeclNode->getOperand(13), // Containting Type
DeclNode->getOperand(14), // isArtificial
- DeclNode->getOperand(15) // isOptimized
+ DeclNode->getOperand(15), // isOptimized
+ SPDeclaration.getFunction()
};
- return DISubprogram(MDNode::get(VMContext, &Elts[0], 16));
+ MDNode *Node =MDNode::get(VMContext, &Elts[0], 16);
+
+ // Create a named metadata so that we do not lose this mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(Node);
+ return DISubprogram(Node);
}
/// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1042,8 +1066,18 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
// The optimizer may remove local variable. If there is an interest
// to preserve variable info in such situation then stash it in a
// named mdnode.
- NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv");
- NMD->addOperand(Node);
+ DISubprogram Fn(getDISubprogram(Context));
+ StringRef FName = "fn";
+ if (Fn.getFunction())
+ FName = Fn.getFunction()->getName();
+ char One = '\1';
+ if (FName.startswith(StringRef(&One, 1)))
+ FName = FName.substr(1);
+ NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName));
+ if (!FnLocals)
+ FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName),
+ NULL, 0, &M);
+ FnLocals->addOperand(Node);
}
return DIVariable(Node);
}
@@ -1110,18 +1144,6 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
return DILocation(MDNode::get(VMContext, &Elts[0], 4));
}
-/// CreateLocation - Creates a debug info location.
-DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
- DIScope S, MDNode *OrigLoc) {
- Value *Elts[] = {
- ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
- S,
- OrigLoc
- };
- return DILocation(MDNode::get(VMContext, &Elts[0], 4));
-}
-
//===----------------------------------------------------------------------===//
// DIFactory: Routines for inserting code into a function
//===----------------------------------------------------------------------===//
@@ -1218,17 +1240,19 @@ void DebugInfoFinder::processModule(Module &M) {
processLocation(DILocation(IA));
}
- NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
- if (!NMD)
- return;
-
- for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
- DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
- if (addGlobalVariable(DIG)) {
- addCompileUnit(DIG.getCompileUnit());
- processType(DIG.getType());
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIGlobalVariable DIG(cast<MDNode>(NMD->getOperand(i)));
+ if (addGlobalVariable(DIG)) {
+ addCompileUnit(DIG.getCompileUnit());
+ processType(DIG.getType());
+ }
}
}
+
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ processSubprogram(DISubprogram(NMD->getOperand(i)));
}
/// processLocation - Process DILocation.
diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp
index a1676e5..d95c376 100644
--- a/lib/Analysis/DomPrinter.cpp
+++ b/lib/Analysis/DomPrinter.cpp
@@ -43,10 +43,10 @@ struct DOTGraphTraits<DomTreeNode*> : public DefaultDOTGraphTraits {
if (isSimple())
return DOTGraphTraits<const Function*>
- ::getSimpleNodeLabel(BB, BB->getParent());
+ ::getSimpleNodeLabel(BB, BB->getParent());
else
return DOTGraphTraits<const Function*>
- ::getCompleteNodeLabel(BB, BB->getParent());
+ ::getCompleteNodeLabel(BB, BB->getParent());
}
};
diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp
index 2bde56d7..65c7c6e 100644
--- a/lib/Analysis/IPA/CallGraph.cpp
+++ b/lib/Analysis/IPA/CallGraph.cpp
@@ -126,13 +126,15 @@ private:
}
// Loop over all of the users of the function, looking for non-call uses.
- for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I)
- if ((!isa<CallInst>(I) && !isa<InvokeInst>(I))
- || !CallSite(cast<Instruction>(I)).isCallee(I)) {
+ for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){
+ User *U = *I;
+ if ((!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ || !CallSite(cast<Instruction>(U)).isCallee(I)) {
// Not a call, or being used as a parameter rather than as the callee.
ExternalCallingNode->addCalledFunction(CallSite(), Node);
break;
}
+ }
// If this function is not defined in this translation unit, it could call
// anything.
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index b14afa3..f13deea 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -233,33 +233,34 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
GlobalValue *OkayStoreDest) {
if (!V->getType()->isPointerTy()) return true;
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) {
+ User *U = *UI;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
Readers.push_back(LI->getParent()->getParent());
- } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (V == SI->getOperand(1)) {
Writers.push_back(SI->getParent()->getParent());
} else if (SI->getOperand(1) != OkayStoreDest) {
return true; // Storing the pointer
}
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true;
- } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest))
return true;
- } else if (isFreeCall(*UI)) {
- Writers.push_back(cast<Instruction>(*UI)->getParent()->getParent());
- } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
+ } else if (isFreeCall(U)) {
+ Writers.push_back(cast<Instruction>(U)->getParent()->getParent());
+ } else if (CallInst *CI = dyn_cast<CallInst>(U)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
- for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
- if (CI->getOperand(i) == V) return true;
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ if (CI->getArgOperand(i) == V) return true;
+ } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
- for (unsigned i = 0, e = II->getNumOperands() - 3; i != e; ++i)
- if (II->getOperand(i) == V) return true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i)
+ if (II->getArgOperand(i) == V) return true;
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
if (CE->getOpcode() == Instruction::GetElementPtr ||
CE->getOpcode() == Instruction::BitCast) {
if (AnalyzeUsesOfPointer(CE, Readers, Writers))
@@ -267,12 +268,14 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
} else {
return true;
}
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
+ } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return true; // Allow comparison against null.
} else {
return true;
}
+ }
+
return false;
}
@@ -291,7 +294,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
// Walk the user list of the global. If we find anything other than a direct
// load or store, bail out.
for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
- if (LoadInst *LI = dyn_cast<LoadInst>(*I)) {
+ User *U = *I;
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
// The pointer loaded from the global can only be used in simple ways:
// we allow addressing of it and loading storing to it. We do *not* allow
// storing the loaded pointer somewhere else or passing to a function.
@@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) {
if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters))
return false; // Loaded pointer escapes.
// TODO: Could try some IP mod/ref of the loaded pointer.
- } else if (StoreInst *SI = dyn_cast<StoreInst>(*I)) {
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// Storing the global itself.
if (SI->getOperand(0) == GV) return false;
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 98dbb69..b1df517 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -162,14 +162,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
if (Function *F = CS.getCalledFunction()) {
if (F->isDeclaration() &&
(F->getName() == "setjmp" || F->getName() == "_setjmp"))
- NeverInline = true;
+ callsSetJmp = true;
// If this call is to function itself, then the function is recursive.
// Inlining it into other functions is a bad idea, because this is
// basically just a form of loop peeling, and our metrics aren't useful
// for that case.
if (F == BB->getParent())
- NeverInline = true;
+ isRecursive = true;
}
if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
@@ -220,7 +220,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
// jump would jump from the inlined copy of the function into the original
// function which is extremely undefined behavior.
if (isa<IndirectBrInst>(BB->getTerminator()))
- NeverInline = true;
+ containsIndirectBr = true;
// Remember NumInsts for this BB.
NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;
@@ -247,7 +247,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
// Don't bother calculating argument weights if we are never going to inline
// the function anyway.
- if (Metrics.NeverInline)
+ if (NeverInline())
return;
// Check out all of the arguments to the function, figuring out how much
@@ -258,6 +258,14 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) {
CountCodeReductionForAlloca(I)));
}
+/// NeverInline - returns true if the function should never be inlined into
+/// any caller
+bool InlineCostAnalyzer::FunctionInfo::NeverInline()
+{
+ return (Metrics.callsSetJmp || Metrics.isRecursive ||
+ Metrics.containsIndirectBr);
+
+}
// getInlineCost - The heuristic used to determine if we should inline the
// function call or not.
//
@@ -315,7 +323,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
CalleeFI->analyzeFunction(Callee);
// If we should never inline this, return a huge cost.
- if (CalleeFI->Metrics.NeverInline)
+ if (CalleeFI->NeverInline())
return InlineCost::getNever();
// FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we
@@ -443,10 +451,15 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
}
// Since CalleeMetrics were already calculated, we know that the CallerMetrics
- // reference isn't invalidated: both were in the DenseMap.
- CallerMetrics.NeverInline |= CalleeMetrics.NeverInline;
+ // reference isn't invalidated: both were in the DenseMap.
CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca;
+ // FIXME: If any of these three are true for the callee, the callee was
+ // not inlined into the caller, so I think they're redundant here.
+ CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp;
+ CallerMetrics.isRecursive |= CalleeMetrics.isRecursive;
+ CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr;
+
CallerMetrics.NumInsts += CalleeMetrics.NumInsts;
CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks;
CallerMetrics.NumCalls += CalleeMetrics.NumCalls;
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index a031cbc..9f1b30d 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -19,7 +19,8 @@
//
// Another limitation is that it assumes all code will be executed. A store
// through a null pointer in a basic block which is never reached is harmless,
-// but this pass will warn about it anyway.
+// but this pass will warn about it anyway. This is the main reason why most
+// of these checks live here instead of in the Verifier pass.
//
// Optimization passes may make conditions that this pass checks for more or
// less obvious. If an optimization pass appears to be introducing a warning,
@@ -35,7 +36,11 @@
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/Lint.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
@@ -64,7 +69,8 @@ namespace {
void visitFunction(Function &F);
void visitCallSite(CallSite CS);
- void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align,
+ void visitMemoryReference(Instruction &I, Value *Ptr,
+ unsigned Size, unsigned Align,
const Type *Ty, unsigned Flags);
void visitCallInst(CallInst &I);
@@ -88,9 +94,14 @@ namespace {
void visitInsertElementInst(InsertElementInst &I);
void visitUnreachableInst(UnreachableInst &I);
+ Value *findValue(Value *V, bool OffsetOk) const;
+ Value *findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const;
+
public:
Module *Mod;
AliasAnalysis *AA;
+ DominatorTree *DT;
TargetData *TD;
std::string Messages;
@@ -104,6 +115,7 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
}
virtual void print(raw_ostream &O, const Module *M) const {}
@@ -176,6 +188,7 @@ X("lint", "Statically lint-checks LLVM IR", false, true);
bool Lint::runOnFunction(Function &F) {
Mod = F.getParent();
AA = &getAnalysis<AliasAnalysis>();
+ DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
visit(F);
dbgs() << MessagesStr.str();
@@ -188,15 +201,17 @@ void Lint::visitFunction(Function &F) {
// fairly common mistake to neglect to name a function.
Assert1(F.hasName() || F.hasLocalLinkage(),
"Unusual: Unnamed function with non-local linkage", &F);
+
+ // TODO: Check for irreducible control flow.
}
void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
- visitMemoryReference(I, Callee, 0, 0, MemRef::Callee);
+ visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee);
- if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
+ if (Function *F = dyn_cast<Function>(findValue(Callee, /*OffsetOk=*/false))) {
Assert1(CS.getCallingConv() == F->getCallingConv(),
"Undefined behavior: Caller and callee calling convention differ",
&I);
@@ -209,23 +224,53 @@ void Lint::visitCallSite(CallSite CS) {
FT->getNumParams() == NumActualArgs,
"Undefined behavior: Call argument count mismatches callee "
"argument count", &I);
-
- // TODO: Check argument types (in case the callee was casted)
-
- // TODO: Check ABI-significant attributes.
- // TODO: Check noalias attribute.
-
- // TODO: Check sret attribute.
+ Assert1(FT->getReturnType() == I.getType(),
+ "Undefined behavior: Call return type mismatches "
+ "callee return type", &I);
+
+ // Check argument types (in case the callee was casted) and attributes.
+ // TODO: Verify that caller and callee attributes are compatible.
+ Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ for (; AI != AE; ++AI) {
+ Value *Actual = *AI;
+ if (PI != PE) {
+ Argument *Formal = PI++;
+ Assert1(Formal->getType() == Actual->getType(),
+ "Undefined behavior: Call argument type mismatches "
+ "callee parameter type", &I);
+
+ // Check that noalias arguments don't alias other arguments. The
+ // AliasAnalysis API isn't expressive enough for what we really want
+ // to do. Known partial overlap is not distinguished from the case
+ // where nothing is known.
+ if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy())
+ for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) {
+ Assert1(AI == BI ||
+ AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias,
+ "Unusual: noalias argument aliases another argument", &I);
+ }
+
+ // Check that an sret argument points to valid memory.
+ if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
+ const Type *Ty =
+ cast<PointerType>(Formal->getType())->getElementType();
+ visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty),
+ TD ? TD->getABITypeAlignment(Ty) : 0,
+ Ty, MemRef::Read | MemRef::Write);
+ }
+ }
+ }
}
if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
AI != AE; ++AI) {
- Value *Obj = (*AI)->getUnderlyingObject();
- Assert1(!isa<AllocaInst>(Obj) && !isa<VAArgInst>(Obj),
+ Value *Obj = findValue(*AI, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
"Undefined behavior: Call with \"tail\" keyword references "
- "alloca or va_arg", &I);
+ "alloca", &I);
}
@@ -237,9 +282,10 @@ void Lint::visitCallSite(CallSite CS) {
case Intrinsic::memcpy: {
MemCpyInst *MCI = cast<MemCpyInst>(&I);
- visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0,
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0,
MemRef::Write);
- visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0,
+ visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0,
MemRef::Read);
// Check that the memcpy arguments don't overlap. The AliasAnalysis API
@@ -247,7 +293,8 @@ void Lint::visitCallSite(CallSite CS) {
// overlap is not distinguished from the case where nothing is known.
unsigned Size = 0;
if (const ConstantInt *Len =
- dyn_cast<ConstantInt>(MCI->getLength()->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(MCI->getLength(),
+ /*OffsetOk=*/false)))
if (Len->getValue().isIntN(32))
Size = Len->getValue().getZExtValue();
Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
@@ -257,15 +304,17 @@ void Lint::visitCallSite(CallSite CS) {
}
case Intrinsic::memmove: {
MemMoveInst *MMI = cast<MemMoveInst>(&I);
- visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0,
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0,
MemRef::Write);
- visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0,
+ visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0,
MemRef::Read);
break;
}
case Intrinsic::memset: {
MemSetInst *MSI = cast<MemSetInst>(&I);
- visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0,
+ // TODO: If the size is known, use it.
+ visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0,
MemRef::Write);
break;
}
@@ -275,15 +324,15 @@ void Lint::visitCallSite(CallSite CS) {
"Undefined behavior: va_start called in a non-varargs function",
&I);
- visitMemoryReference(I, CS.getArgument(0), 0, 0,
+ visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
MemRef::Read | MemRef::Write);
break;
case Intrinsic::vacopy:
- visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write);
- visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read);
+ visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write);
+ visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read);
break;
case Intrinsic::vaend:
- visitMemoryReference(I, CS.getArgument(0), 0, 0,
+ visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
MemRef::Read | MemRef::Write);
break;
@@ -291,7 +340,7 @@ void Lint::visitCallSite(CallSite CS) {
// Stackrestore doesn't read or write memory, but it sets the
// stack pointer, which the compiler may read from or write to
// at any time, so check it for both readability and writeability.
- visitMemoryReference(I, CS.getArgument(0), 0, 0,
+ visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0,
MemRef::Read | MemRef::Write);
break;
}
@@ -310,17 +359,35 @@ void Lint::visitReturnInst(ReturnInst &I) {
Assert1(!F->doesNotReturn(),
"Unusual: Return statement in function with noreturn attribute",
&I);
+
+ if (Value *V = I.getReturnValue()) {
+ Value *Obj = findValue(V, /*OffsetOk=*/true);
+ Assert1(!isa<AllocaInst>(Obj),
+ "Unusual: Returning alloca value", &I);
+ }
}
-// TODO: Add a length argument and check that the reference is in bounds
+// TODO: Check that the reference is in bounds.
+// TODO: Check readnone/readonly function attributes.
void Lint::visitMemoryReference(Instruction &I,
- Value *Ptr, unsigned Align, const Type *Ty,
- unsigned Flags) {
- Value *UnderlyingObject = Ptr->getUnderlyingObject();
+ Value *Ptr, unsigned Size, unsigned Align,
+ const Type *Ty, unsigned Flags) {
+ // If no memory is being referenced, it doesn't matter if the pointer
+ // is valid.
+ if (Size == 0)
+ return;
+
+ Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
Assert1(!isa<ConstantPointerNull>(UnderlyingObject),
"Undefined behavior: Null pointer dereference", &I);
Assert1(!isa<UndefValue>(UnderlyingObject),
"Undefined behavior: Undef pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isAllOnesValue(),
+ "Unusual: All-ones pointer dereference", &I);
+ Assert1(!isa<ConstantInt>(UnderlyingObject) ||
+ !cast<ConstantInt>(UnderlyingObject)->isOne(),
+ "Unusual: Address one pointer dereference", &I);
if (Flags & MemRef::Write) {
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
@@ -361,13 +428,16 @@ void Lint::visitMemoryReference(Instruction &I,
}
void Lint::visitLoadInst(LoadInst &I) {
- visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(),
- MemRef::Read);
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getType()), I.getAlignment(),
+ I.getType(), MemRef::Read);
}
void Lint::visitStoreInst(StoreInst &I) {
- visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(),
- I.getOperand(0)->getType(), MemRef::Write);
+ visitMemoryReference(I, I.getPointerOperand(),
+ AA->getTypeStoreSize(I.getOperand(0)->getType()),
+ I.getAlignment(),
+ I.getOperand(0)->getType(), MemRef::Write);
}
void Lint::visitXor(BinaryOperator &I) {
@@ -384,21 +454,21 @@ void Lint::visitSub(BinaryOperator &I) {
void Lint::visitLShr(BinaryOperator &I) {
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitAShr(BinaryOperator &I) {
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
void Lint::visitShl(BinaryOperator &I) {
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(I.getOperand(1)->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
Assert1(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
"Undefined result: Shift count out of range", &I);
}
@@ -439,27 +509,31 @@ void Lint::visitAllocaInst(AllocaInst &I) {
// This isn't undefined behavior, it's just an obvious pessimization.
Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
"Pessimization: Static alloca outside of entry block", &I);
+
+ // TODO: Check for an unusual size (MSB set?)
}
void Lint::visitVAArgInst(VAArgInst &I) {
- visitMemoryReference(I, I.getOperand(0), 0, 0,
+ visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0,
MemRef::Read | MemRef::Write);
}
void Lint::visitIndirectBrInst(IndirectBrInst &I) {
- visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee);
+ visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee);
}
void Lint::visitExtractElementInst(ExtractElementInst &I) {
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(I.getIndexOperand()->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
+ /*OffsetOk=*/false)))
Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),
"Undefined result: extractelement index out of range", &I);
}
void Lint::visitInsertElementInst(InsertElementInst &I) {
if (ConstantInt *CI =
- dyn_cast<ConstantInt>(I.getOperand(2)->stripPointerCasts()))
+ dyn_cast<ConstantInt>(findValue(I.getOperand(2),
+ /*OffsetOk=*/false)))
Assert1(CI->getValue().ult(I.getType()->getNumElements()),
"Undefined result: insertelement index out of range", &I);
}
@@ -472,6 +546,91 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {
"side effects", &I);
}
+/// findValue - Look through bitcasts and simple memory reference patterns
+/// to identify an equivalent, but more informative, value. If OffsetOk
+/// is true, look through getelementptrs with non-zero offsets too.
+///
+/// Most analysis passes don't require this logic, because instcombine
+/// will simplify most of these kinds of things away. But it's a goal of
+/// this Lint pass to be useful even on non-optimized IR.
+Value *Lint::findValue(Value *V, bool OffsetOk) const {
+ SmallPtrSet<Value *, 4> Visited;
+ return findValueImpl(V, OffsetOk, Visited);
+}
+
+/// findValueImpl - Implementation helper for findValue.
+Value *Lint::findValueImpl(Value *V, bool OffsetOk,
+ SmallPtrSet<Value *, 4> &Visited) const {
+ // Detect self-referential values.
+ if (!Visited.insert(V))
+ return UndefValue::get(V->getType());
+
+ // TODO: Look through sext or zext cast, when the result is known to
+ // be interpreted as signed or unsigned, respectively.
+ // TODO: Look through eliminable cast pairs.
+ // TODO: Look through calls with unique return values.
+ // TODO: Look through vector insert/extract/shuffle.
+ V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts();
+ if (LoadInst *L = dyn_cast<LoadInst>(V)) {
+ BasicBlock::iterator BBI = L;
+ BasicBlock *BB = L->getParent();
+ SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
+ for (;;) {
+ if (!VisitedBlocks.insert(BB)) break;
+ if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(),
+ BB, BBI, 6, AA))
+ return findValueImpl(U, OffsetOk, Visited);
+ if (BBI != BB->begin()) break;
+ BB = BB->getUniquePredecessor();
+ if (!BB) break;
+ BBI = BB->end();
+ }
+ } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+ if (Value *W = PN->hasConstantValue(DT))
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
+ if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
+ } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
+ if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
+ Ex->idx_begin(),
+ Ex->idx_end()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ // Same as above, but for ConstantExpr instead of Instruction.
+ if (Instruction::isCast(CE->getOpcode())) {
+ if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),
+ CE->getOperand(0)->getType(),
+ CE->getType(),
+ TD ? TD->getIntPtrType(V->getContext()) :
+ Type::getInt64Ty(V->getContext())))
+ return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
+ } else if (CE->getOpcode() == Instruction::ExtractValue) {
+ const SmallVector<unsigned, 4> &Indices = CE->getIndices();
+ if (Value *W = FindInsertedValue(CE->getOperand(0),
+ Indices.begin(),
+ Indices.end()))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+ }
+
+ // As a last resort, try SimplifyInstruction or constant folding.
+ if (Instruction *Inst = dyn_cast<Instruction>(V)) {
+ if (Value *W = SimplifyInstruction(Inst, TD))
+ if (W != Inst)
+ return findValueImpl(W, OffsetOk, Visited);
+ } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
+ if (Value *W = ConstantFoldConstantExpression(CE, TD))
+ if (W != V)
+ return findValueImpl(W, OffsetOk, Visited);
+ }
+
+ return V;
+}
+
//===----------------------------------------------------------------------===//
// Implement the public interfaces to this file...
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp
new file mode 100644
index 0000000..2ba1d86
--- /dev/null
+++ b/lib/Analysis/Loads.cpp
@@ -0,0 +1,235 @@
+//===- Loads.cpp - Local load analysis ------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines simple local analyses for load instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/GlobalAlias.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/IntrinsicInst.h"
+using namespace llvm;
+
+/// AreEquivalentAddressValues - Test if A and B will obviously have the same
+/// value. This includes recognizing that %t0 and %t1 will have the same
+/// value in code like this:
+/// %t0 = getelementptr \@a, 0, 3
+/// store i32 0, i32* %t0
+/// %t1 = getelementptr \@a, 0, 3
+/// %t2 = load i32* %t1
+///
+static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
+ // Test if the values are trivially equivalent.
+ if (A == B) return true;
+
+ // Test if the values come from identical arithmetic instructions.
+ // Use isIdenticalToWhenDefined instead of isIdenticalTo because
+ // this function is only used when one address use dominates the
+ // other, which means that they'll always either have the same
+ // value or one of them will have an undefined value.
+ if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
+ isa<PHINode>(A) || isa<GetElementPtrInst>(A))
+ if (const Instruction *BI = dyn_cast<Instruction>(B))
+ if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
+ return true;
+
+ // Otherwise they may not be equivalent.
+ return false;
+}
+
+/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
+/// bitcasts to get back to the underlying object being addressed, keeping
+/// track of the offset in bytes from the GEPs relative to the result.
+/// This is closely related to Value::getUnderlyingObject but is located
+/// here to avoid making VMCore depend on TargetData.
+static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
+ uint64_t &ByteOffset,
+ unsigned MaxLookup = 6) {
+ if (!V->getType()->isPointerTy())
+ return V;
+ for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
+ if (!GEP->hasAllConstantIndices())
+ return V;
+ SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
+ ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
+ &Indices[0], Indices.size());
+ V = GEP->getPointerOperand();
+ } else if (Operator::getOpcode(V) == Instruction::BitCast) {
+ V = cast<Operator>(V)->getOperand(0);
+ } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+ if (GA->mayBeOverridden())
+ return V;
+ V = GA->getAliasee();
+ } else {
+ return V;
+ }
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
+ }
+ return V;
+}
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
+ unsigned Align, const TargetData *TD) {
+ uint64_t ByteOffset = 0;
+ Value *Base = V;
+ if (TD)
+ Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
+
+ const Type *BaseType = 0;
+ unsigned BaseAlign = 0;
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
+ // An alloca is safe to load from as load as it is suitably aligned.
+ BaseType = AI->getAllocatedType();
+ BaseAlign = AI->getAlignment();
+ } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
+ // Global variables are safe to load from but their size cannot be
+ // guaranteed if they are overridden.
+ if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
+ BaseType = GV->getType()->getElementType();
+ BaseAlign = GV->getAlignment();
+ }
+ }
+
+ if (BaseType && BaseType->isSized()) {
+ if (TD && BaseAlign == 0)
+ BaseAlign = TD->getPrefTypeAlignment(BaseType);
+
+ if (Align <= BaseAlign) {
+ if (!TD)
+ return true; // Loading directly from an alloca or global is OK.
+
+ // Check if the load is within the bounds of the underlying object.
+ const PointerType *AddrTy = cast<PointerType>(V->getType());
+ uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
+ if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
+ (Align == 0 || (ByteOffset % Align) == 0))
+ return true;
+ }
+ }
+
+ // Otherwise, be a little bit aggressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true;
+ }
+ }
+ return false;
+}
+
+/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
+/// instruction before ScanFrom) checking to see if we have the value at the
+/// memory address *Ptr locally available within a small number of instructions.
+/// If the value is available, return it.
+///
+/// If not, return the iterator for the last validated instruction that the
+/// value would be live through. If we scanned the entire block and didn't find
+/// something that invalidates *Ptr or provides it, ScanFrom would be left at
+/// begin() and this returns null. ScanFrom could also be left
+///
+/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
+/// it is set to 0, it will scan the whole block. You can also optionally
+/// specify an alias analysis implementation, which makes this more precise.
+Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
+ BasicBlock::iterator &ScanFrom,
+ unsigned MaxInstsToScan,
+ AliasAnalysis *AA) {
+ if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
+
+ // If we're using alias analysis to disambiguate get the size of *Ptr.
+ unsigned AccessSize = 0;
+ if (AA) {
+ const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
+ AccessSize = AA->getTypeStoreSize(AccessTy);
+ }
+
+ while (ScanFrom != ScanBB->begin()) {
+ // We must ignore debug info directives when counting (otherwise they
+ // would affect codegen).
+ Instruction *Inst = --ScanFrom;
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ // Restore ScanFrom to expected value in case next test succeeds
+ ScanFrom++;
+
+ // Don't scan huge blocks.
+ if (MaxInstsToScan-- == 0) return 0;
+
+ --ScanFrom;
+ // If this is a load of Ptr, the loaded value is available.
+ if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+ if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
+ return LI;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ // If this is a store through Ptr, the value is available!
+ if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
+ return SI->getOperand(0);
+
+ // If Ptr is an alloca and this is a store to a different alloca, ignore
+ // the store. This is a trivial form of alias analysis that is important
+ // for reg2mem'd code.
+ if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
+ (isa<AllocaInst>(SI->getOperand(1)) ||
+ isa<GlobalVariable>(SI->getOperand(1))))
+ continue;
+
+ // If we have alias analysis and it says the store won't modify the loaded
+ // value, ignore the store.
+ if (AA &&
+ (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // Otherwise the store that may or may not alias the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+
+ // If this is some other instruction that may clobber Ptr, bail out.
+ if (Inst->mayWriteToMemory()) {
+ // If alias analysis claims that it really won't modify the load,
+ // ignore it.
+ if (AA &&
+ (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
+ continue;
+
+ // May modify the pointer, bail out.
+ ++ScanFrom;
+ return 0;
+ }
+ }
+
+ // Got to the start of the block, we didn't find it, but are done for this
+ // block.
+ return 0;
+}
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 735e31f..818d0a9 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -266,15 +266,16 @@ unsigned Loop::getSmallConstantTripMultiple() const {
bool Loop::isLCSSAForm(DominatorTree &DT) const {
// Sort the blocks vector so that we can use binary search to do quick
// lookups.
- SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
+ SmallPtrSet<BasicBlock*, 16> LoopBBs(block_begin(), block_end());
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
BasicBlock *BB = *BI;
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
++UI) {
- BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(*UI))
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U))
UserBB = P->getIncomingBlock(UI);
// Check the current block, as a fast-path, before checking whether
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 89f9743..1ab18ca 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -101,9 +101,9 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
if (const StructType *ST = dyn_cast<StructType>(T))
ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
- // If malloc calls' arg can be determined to be a multiple of ElementSize,
+ // If malloc call's arg can be determined to be a multiple of ElementSize,
// return the multiple. Otherwise, return NULL.
- Value *MallocArg = CI->getOperand(1);
+ Value *MallocArg = CI->getArgOperand(0);
Value *Multiple = NULL;
if (ComputeMultiple(MallocArg, ElementSize, Multiple,
LookThroughSExt))
@@ -120,7 +120,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
Value *ArraySize = computeArraySize(CI, TD);
if (ArraySize &&
- ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
+ ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
return CI;
// CI is a non-array malloc or we can't figure out that it is an array malloc.
@@ -183,25 +183,25 @@ Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
// free Call Utility Functions.
//
-/// isFreeCall - Returns true if the value is a call to the builtin free()
-bool llvm::isFreeCall(const Value *I) {
+/// isFreeCall - Returns non-null if the value is a call to the builtin free()
+const CallInst *llvm::isFreeCall(const Value *I) {
const CallInst *CI = dyn_cast<CallInst>(I);
if (!CI)
- return false;
+ return 0;
Function *Callee = CI->getCalledFunction();
if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free")
- return false;
+ return 0;
// Check free prototype.
// FIXME: workaround for PR5130, this will be obsolete when a nobuiltin
// attribute will exist.
const FunctionType *FTy = Callee->getFunctionType();
if (!FTy->getReturnType()->isVoidTy())
- return false;
+ return 0;
if (FTy->getNumParams() != 1)
- return false;
+ return 0;
if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
- return false;
+ return 0;
- return true;
+ return CI;
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2aa2f17..1f54d74 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -116,8 +116,8 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
} else if (VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {
Pointer = V->getOperand(0);
PointerSize = AA->getTypeStoreSize(V->getType());
- } else if (isFreeCall(Inst)) {
- Pointer = Inst->getOperand(1);
+ } else if (const CallInst *CI = isFreeCall(Inst)) {
+ Pointer = CI->getArgOperand(0);
// calls to free() erase the entire structure
PointerSize = ~0ULL;
} else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
@@ -197,9 +197,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
AliasAnalysis::AliasResult R =
- AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U);
+ AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U);
if (R == AliasAnalysis::MustAlias) {
- InvariantTag = II->getOperand(1);
+ InvariantTag = II->getArgOperand(0);
continue;
}
@@ -210,7 +210,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// pointer, not on query pointers that are indexed off of them. It'd
// be nice to handle that at some point.
AliasAnalysis::AliasResult R =
- AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U);
+ AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U);
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(II);
}
@@ -365,25 +365,26 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
MemPtr = LI->getPointerOperand();
MemSize = AA->getTypeStoreSize(LI->getType());
}
- } else if (isFreeCall(QueryInst)) {
- MemPtr = QueryInst->getOperand(1);
+ } else if (const CallInst *CI = isFreeCall(QueryInst)) {
+ MemPtr = CI->getArgOperand(0);
// calls to free() erase the entire structure, not just a field.
MemSize = ~0UL;
} else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {
int IntrinsicID = 0; // Intrinsic IDs start at 1.
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+ IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst);
+ if (II)
IntrinsicID = II->getIntrinsicID();
switch (IntrinsicID) {
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- MemPtr = QueryInst->getOperand(2);
- MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
+ MemPtr = II->getArgOperand(1);
+ MemSize = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue();
break;
case Intrinsic::invariant_end:
- MemPtr = QueryInst->getOperand(3);
- MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
+ MemPtr = II->getArgOperand(2);
+ MemSize = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue();
break;
default:
CallSite QueryCS = CallSite::get(QueryInst);
@@ -456,7 +457,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
// Okay, we have a cache entry. If we know it is not dirty, just return it
// with no computation.
if (!CacheP.second) {
- NumCacheNonLocal++;
+ ++NumCacheNonLocal;
return Cache;
}
@@ -478,7 +479,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
BasicBlock *QueryBB = QueryCS.getInstruction()->getParent();
for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI)
DirtyBlocks.push_back(*PI);
- NumUncacheNonLocal++;
+ ++NumUncacheNonLocal;
}
// isReadonlyCall - If this is a read-only call, we can be more aggressive.
diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp
index f0f3a05..7354afa 100644
--- a/lib/Analysis/PostDominators.cpp
+++ b/lib/Analysis/PostDominators.cpp
@@ -67,10 +67,11 @@ PostDominanceFrontier::calculate(const PostDominatorTree &DT,
if (BB)
for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB);
SI != SE; ++SI) {
+ BasicBlock *P = *SI;
// Does Node immediately dominate this predecessor?
- DomTreeNode *SINode = DT[*SI];
+ DomTreeNode *SINode = DT[P];
if (SINode && SINode->getIDom() != Node)
- S.insert(*SI);
+ S.insert(P);
}
// At this point, S is DFlocal. Now we union in DFup's of our children...
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 662576e..38dcd25 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -577,8 +577,6 @@ static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::s
template<>
bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
- bool hasNoSuccessors = false;
-
double inWeight = 0;
std::set<Edge> inMissing;
std::set<const BasicBlock*> ProcessedPreds;
@@ -596,10 +594,8 @@ bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *B
std::set<Edge> outMissing;
std::set<const BasicBlock*> ProcessedSuccs;
succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi == sbbe) {
+ if (sbbi == sbbe)
readEdge(this,getEdge(BB,0),outWeight,outMissing);
- hasNoSuccessors = true;
- }
for ( ; sbbi != sbbe; ++sbbi ) {
if (ProcessedSuccs.insert(*sbbi).second) {
readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 6870268..413b3b4 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -822,7 +822,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
// Fold if the operand is constant.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
return getConstant(
- cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
+ cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
// trunc(trunc(x)) --> trunc(x)
if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
@@ -844,9 +845,9 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,
return getAddRecExpr(Operands, AddRec->getLoop());
}
- // The cast wasn't folded; create an explicit cast node.
- // Recompute the insert position, as it may have been invalidated.
- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+ // The cast wasn't folded; create an explicit cast node. We can reuse
+ // the existing insert position since if we get here, we won't have
+ // made any changes which would invalidate it.
SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator),
Op, Ty);
UniqueSCEVs.InsertNode(S, IP);
@@ -862,12 +863,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
- const Type *IntTy = getEffectiveSCEVType(Ty);
- Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy);
- if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
- return getConstant(cast<ConstantInt>(C));
- }
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
// zext(zext(x)) --> zext(x)
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
@@ -997,12 +996,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
Ty = getEffectiveSCEVType(Ty);
// Fold if the operand is constant.
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) {
- const Type *IntTy = getEffectiveSCEVType(Ty);
- Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy);
- if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
- return getConstant(cast<ConstantInt>(C));
- }
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
+ return getConstant(
+ cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(),
+ getEffectiveSCEVType(Ty))));
// sext(sext(x)) --> sext(x)
if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op))
@@ -1208,8 +1205,19 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
ScalarEvolution &SE) {
bool Interesting = false;
- // Iterate over the add operands.
- for (unsigned i = 0, e = NumOperands; i != e; ++i) {
+ // Iterate over the add operands. They are sorted, with constants first.
+ unsigned i = 0;
+ while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ ++i;
+ // Pull a buried constant out to the outside.
+ if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
+ Interesting = true;
+ AccumulatedConstant += Scale * C->getValue()->getValue();
+ }
+
+ // Next comes everything else. We're especially interested in multiplies
+ // here, but they're in the middle, so just visit the rest with one loop.
+ for (; i != NumOperands; ++i) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
APInt NewScale =
@@ -1237,11 +1245,6 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,
Interesting = true;
}
}
- } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
- // Pull a buried constant out to the outside.
- if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())
- Interesting = true;
- AccumulatedConstant += Scale * C->getValue()->getValue();
} else {
// An ordinary operand. Update the map.
std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair =
@@ -1275,9 +1278,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
+ const Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
- assert(getEffectiveSCEVType(Ops[i]->getType()) ==
- getEffectiveSCEVType(Ops[0]->getType()) &&
+ assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
"SCEVAddExpr operand types don't match!");
#endif
@@ -1400,8 +1403,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
// If we have an add, expand the add operands onto the end of the operands
// list.
- Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
Ops.erase(Ops.begin()+Idx);
+ Ops.append(Add->op_begin(), Add->op_end());
DeletedAdd = true;
}
@@ -1549,9 +1552,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->op_end());
AddRecOps[0] = getAddExpr(LIOps);
- // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition
- // is not associative so this isn't necessarily safe.
- const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop);
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer add and the inner addrec are guaranteed to have no overflow.
+ const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop,
+ HasNUW && AddRec->hasNoUnsignedWrap(),
+ HasNSW && AddRec->hasNoSignedWrap());
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@@ -1578,7 +1583,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
AddRec->op_end());
for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
if (i >= NewOps.size()) {
- NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
+ NewOps.append(OtherAddRec->op_begin()+i,
OtherAddRec->op_end());
break;
}
@@ -1711,8 +1716,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
// If we have an mul, expand the mul operands onto the end of the operands
// list.
- Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end());
Ops.erase(Ops.begin()+Idx);
+ Ops.append(Mul->op_begin(), Mul->op_end());
DeletedMul = true;
}
@@ -1747,23 +1752,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
- if (LIOps.size() == 1) {
- const SCEV *Scale = LIOps[0];
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
- NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
- } else {
- for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
- SmallVector<const SCEV *, 4> MulOps(LIOps.begin(), LIOps.end());
- MulOps.push_back(AddRec->getOperand(i));
- NewOps.push_back(getMulExpr(MulOps));
- }
- }
+ const SCEV *Scale = getMulExpr(LIOps);
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
+ NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
- // It's tempting to propagate the NSW flag here, but nsw multiplication
- // is not associative so this isn't necessarily safe.
+ // Build the new addrec. Propagate the NUW and NSW flags if both the
+ // outer mul and the inner addrec are guaranteed to have no overflow.
const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(),
HasNUW && AddRec->hasNoUnsignedWrap(),
- /*HasNSW=*/false);
+ HasNSW && AddRec->hasNoSignedWrap());
// If all of the other operands were loop invariant, we are done.
if (Ops.size() == 1) return NewRec;
@@ -1942,8 +1939,7 @@ const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start,
Operands.push_back(Start);
if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
if (StepChrec->getLoop() == L) {
- Operands.insert(Operands.end(), StepChrec->op_begin(),
- StepChrec->op_end());
+ Operands.append(StepChrec->op_begin(), StepChrec->op_end());
return getAddRecExpr(Operands, L);
}
@@ -2106,8 +2102,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
if (Idx < Ops.size()) {
bool DeletedSMax = false;
while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
- Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end());
Ops.erase(Ops.begin()+Idx);
+ Ops.append(SMax->op_begin(), SMax->op_end());
DeletedSMax = true;
}
@@ -2211,8 +2207,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
if (Idx < Ops.size()) {
bool DeletedUMax = false;
while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
- Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end());
Ops.erase(Ops.begin()+Idx);
+ Ops.append(UMax->op_begin(), UMax->op_end());
DeletedUMax = true;
}
@@ -2278,7 +2274,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
Constant *C = ConstantExpr::getSizeOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2286,7 +2283,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) {
const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) {
Constant *C = ConstantExpr::getAlignOf(AllocTy);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2302,7 +2300,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy,
Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2311,7 +2310,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy,
Constant *FieldNo) {
Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo);
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
- C = ConstantFoldConstantExpression(CE, TD);
+ if (Constant *Folded = ConstantFoldConstantExpression(CE, TD))
+ C = Folded;
const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy));
return getTruncateOrZeroExtend(getSCEV(C), Ty);
}
@@ -2398,13 +2398,6 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
return S;
}
-/// getIntegerSCEV - Given a SCEVable type, create a constant for the
-/// specified signed integer value and return a SCEV for the constant.
-const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) {
- const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
- return getConstant(ConstantInt::get(ITy, Val));
-}
-
/// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
///
const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) {
@@ -2772,7 +2765,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
///
const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
- bool InBounds = GEP->isInBounds();
+ // Don't blindly transfer the inbounds flag from the GEP instruction to the
+ // Add expression, because the Instruction may be guarded by control flow
+ // and the no-overflow bits may not be valid for the expression in any
+ // context.
+
const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType());
Value *Base = GEP->getOperand(0);
// Don't attempt to analyze GEPs over unsized objects.
@@ -2788,23 +2785,30 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
if (const StructType *STy = dyn_cast<StructType>(*GTI++)) {
// For a struct, add the member offset.
unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
- TotalOffset = getAddExpr(TotalOffset,
- getOffsetOfExpr(STy, FieldNo),
- /*HasNUW=*/false, /*HasNSW=*/InBounds);
+ const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo);
+
+ // Add the field offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, FieldOffset);
} else {
// For an array, add the element offset, explicitly scaled.
- const SCEV *LocalOffset = getSCEV(Index);
+ const SCEV *ElementSize = getSizeOfExpr(*GTI);
+ const SCEV *IndexS = getSCEV(Index);
// Getelementptr indices are signed.
- LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
- // Lower "inbounds" GEPs to NSW arithmetic.
- LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI),
- /*HasNUW=*/false, /*HasNSW=*/InBounds);
- TotalOffset = getAddExpr(TotalOffset, LocalOffset,
- /*HasNUW=*/false, /*HasNSW=*/InBounds);
+ IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy);
+
+ // Multiply the index by the element size to compute the element offset.
+ const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize);
+
+ // Add the element offset to the running total offset.
+ TotalOffset = getAddExpr(TotalOffset, LocalOffset);
}
}
- return getAddExpr(getSCEV(Base), TotalOffset,
- /*HasNUW=*/false, /*HasNSW=*/InBounds);
+
+ // Get the SCEV for the GEP base.
+ const SCEV *BaseS = getSCEV(Base);
+
+ // Add the total offset from all the GEP indices to the base.
+ return getAddExpr(BaseS, TotalOffset);
}
/// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
@@ -2963,7 +2967,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))
if (!C->getValue()->isZero())
ConservativeResult =
- ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0));
+ ConservativeResult.intersectWith(
+ ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)));
// TODO: non-affine addrec
if (AddRec->isAffine()) {
@@ -3196,15 +3201,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
Operator *U = cast<Operator>(V);
switch (Opcode) {
case Instruction::Add:
- // Don't transfer the NSW and NUW bits from the Add instruction to the
- // Add expression, because the Instruction may be guarded by control
- // flow and the no-overflow bits may not be valid for the expression in
- // any context.
return getAddExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::Mul:
- // Don't transfer the NSW and NUW bits from the Mul instruction to the
- // Mul expression, as with Add.
return getMulExpr(getSCEV(U->getOperand(0)),
getSCEV(U->getOperand(1)));
case Instruction::UDiv:
@@ -3658,6 +3657,26 @@ void ScalarEvolution::forgetValue(Value *V) {
ConstantEvolutionLoopExitValue.erase(PN);
}
+ // If there's a SCEVUnknown tying this value into the SCEV
+ // space, remove it from the folding set map. The SCEVUnknown
+ // object and any other SCEV objects which reference it
+ // (transitively) remain allocated, effectively leaked until
+ // the underlying BumpPtrAllocator is freed.
+ //
+ // This permits SCEV pointers to be used as keys in maps
+ // such as the ValuesAtScopes map.
+ FoldingSetNodeID ID;
+ ID.AddInteger(scUnknown);
+ ID.AddPointer(I);
+ void *IP;
+ if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) {
+ UniqueSCEVs.RemoveNode(S);
+
+ // This isn't necessary, but we might as well remove the
+ // value from the ValuesAtScopes map too.
+ ValuesAtScopes.erase(S);
+ }
+
PushDefUseChildren(I, Worklist);
}
}
@@ -4139,8 +4158,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
// constant or derived from a PHI node themselves.
PHINode *PHI = 0;
for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op)
- if (!(isa<Constant>(I->getOperand(Op)) ||
- isa<GlobalValue>(I->getOperand(Op)))) {
+ if (!isa<Constant>(I->getOperand(Op))) {
PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L);
if (P == 0) return 0; // Not evolving from PHI
if (PHI == 0)
@@ -4161,11 +4179,9 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
const TargetData *TD) {
if (isa<PHINode>(V)) return PHIVal;
if (Constant *C = dyn_cast<Constant>(V)) return C;
- if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
Instruction *I = cast<Instruction>(V);
- std::vector<Constant*> Operands;
- Operands.resize(I->getNumOperands());
+ std::vector<Constant*> Operands(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
@@ -4207,8 +4223,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
return RetVal = 0; // Must be a constant.
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- PHINode *PN2 = getConstantEvolvingPHI(BEValue, L);
- if (PN2 != PN)
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
return RetVal = 0; // Not derived from same PHI.
// Execute the loop symbolically to determine the exit value.
@@ -4243,8 +4259,11 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
PHINode *PN = getConstantEvolvingPHI(Cond, L);
if (PN == 0) return getCouldNotCompute();
- // Since the loop is canonicalized, the PHI node must have two entries. One
- // entry must be a constant (coming in from outside of the loop), and the
+ // If the loop is canonicalized, the PHI will have exactly two entries.
+ // That's the only form we support here.
+ if (PN->getNumIncomingValues() != 2) return getCouldNotCompute();
+
+ // One entry must be a constant (coming in from outside of the loop), and the
// second must be derived from the same PHI.
bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1));
Constant *StartCST =
@@ -4252,8 +4271,9 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
if (StartCST == 0) return getCouldNotCompute(); // Must be a constant.
Value *BEValue = PN->getIncomingValue(SecondIsBackedge);
- PHINode *PN2 = getConstantEvolvingPHI(BEValue, L);
- if (PN2 != PN) return getCouldNotCompute(); // Not derived from same PHI.
+ if (getConstantEvolvingPHI(BEValue, L) != PN &&
+ !isa<Constant>(BEValue))
+ return getCouldNotCompute(); // Not derived from same PHI.
// Okay, we find a PHI node that defines the trip count of this loop. Execute
// the loop symbolically to determine when the condition gets a value of
@@ -4341,54 +4361,51 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// the arguments into constants, and if so, try to constant propagate the
// result. This is particularly useful for computing loop exit values.
if (CanConstantFold(I)) {
- std::vector<Constant*> Operands;
- Operands.reserve(I->getNumOperands());
+ SmallVector<Constant *, 4> Operands;
+ bool MadeImprovement = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
Value *Op = I->getOperand(i);
if (Constant *C = dyn_cast<Constant>(Op)) {
Operands.push_back(C);
- } else {
- // If any of the operands is non-constant and if they are
- // non-integer and non-pointer, don't even try to analyze them
- // with scev techniques.
- if (!isSCEVable(Op->getType()))
- return V;
-
- const SCEV *OpV = getSCEVAtScope(Op, L);
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV)) {
- Constant *C = SC->getValue();
- if (C->getType() != Op->getType())
- C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- Op->getType(),
- false),
- C, Op->getType());
- Operands.push_back(C);
- } else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV)) {
- if (Constant *C = dyn_cast<Constant>(SU->getValue())) {
- if (C->getType() != Op->getType())
- C =
- ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
- Op->getType(),
- false),
- C, Op->getType());
- Operands.push_back(C);
- } else
- return V;
- } else {
- return V;
- }
+ continue;
}
+
+ // If any of the operands is non-constant and if they are
+ // non-integer and non-pointer, don't even try to analyze them
+ // with scev techniques.
+ if (!isSCEVable(Op->getType()))
+ return V;
+
+ const SCEV *OrigV = getSCEV(Op);
+ const SCEV *OpV = getSCEVAtScope(OrigV, L);
+ MadeImprovement |= OrigV != OpV;
+
+ Constant *C = 0;
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OpV))
+ C = SC->getValue();
+ if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(OpV))
+ C = dyn_cast<Constant>(SU->getValue());
+ if (!C) return V;
+ if (C->getType() != Op->getType())
+ C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false,
+ Op->getType(),
+ false),
+ C, Op->getType());
+ Operands.push_back(C);
}
- Constant *C = 0;
- if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- Operands[0], Operands[1], TD);
- else
- C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(), TD);
- if (C)
+ // Check to see if getSCEVAtScope actually made an improvement.
+ if (MadeImprovement) {
+ Constant *C = 0;
+ if (const CmpInst *CI = dyn_cast<CmpInst>(I))
+ C = ConstantFoldCompareInstOperands(CI->getPredicate(),
+ Operands[0], Operands[1], TD);
+ else
+ C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
+ if (!C) return V;
return getSCEV(C);
+ }
}
}
@@ -4438,7 +4455,29 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// If this is a loop recurrence for a loop that does not contain L, then we
// are dealing with the final value computed by the loop.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) {
- if (!L || !AddRec->getLoop()->contains(L)) {
+ // First, attempt to evaluate each operand.
+ // Avoid performing the look-up in the common case where the specified
+ // expression has no loop-variant portions.
+ for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
+ const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L);
+ if (OpAtScope == AddRec->getOperand(i))
+ continue;
+
+ // Okay, at least one of these operands is loop variant but might be
+ // foldable. Build a new instance of the folded commutative expression.
+ SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(),
+ AddRec->op_begin()+i);
+ NewOps.push_back(OpAtScope);
+ for (++i; i != e; ++i)
+ NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L));
+
+ AddRec = cast<SCEVAddRecExpr>(getAddRecExpr(NewOps, AddRec->getLoop()));
+ break;
+ }
+
+ // If the scope is outside the addrec's loop, evaluate it by using the
+ // loop exit value of the addrec.
+ if (!AddRec->getLoop()->contains(L)) {
// To evaluate this recurrence, we need to know how many times the AddRec
// loop iterates. Compute this now.
const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop());
@@ -4447,6 +4486,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
// Then, evaluate the AddRec.
return AddRec->evaluateAtIteration(BackedgeTakenCount, *this);
}
+
return AddRec;
}
@@ -4696,23 +4736,6 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
return getCouldNotCompute();
}
-/// getLoopPredecessor - If the given loop's header has exactly one unique
-/// predecessor outside the loop, return it. Otherwise return null.
-/// This is less strict that the loop "preheader" concept, which requires
-/// the predecessor to have only one single successor.
-///
-BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) {
- BasicBlock *Header = L->getHeader();
- BasicBlock *Pred = 0;
- for (pred_iterator PI = pred_begin(Header), E = pred_end(Header);
- PI != E; ++PI)
- if (!L->contains(*PI)) {
- if (Pred && Pred != *PI) return 0; // Multiple predecessors.
- Pred = *PI;
- }
- return Pred;
-}
-
/// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB
/// (which may not be an immediate predecessor) which has exactly one
/// successor from which BB is reachable, or null if no such block is
@@ -4730,7 +4753,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
// If the header has a unique predecessor outside the loop, it must be
// a block that has exactly one successor that can reach the loop.
if (Loop *L = LI->getLoopFor(BB))
- return std::make_pair(getLoopPredecessor(L), L->getHeader());
+ return std::make_pair(L->getLoopPredecessor(), L->getHeader());
return std::pair<BasicBlock *, BasicBlock *>();
}
@@ -5181,7 +5204,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
// as there are predecessors that can be found that have unique successors
// leading to the original header.
for (std::pair<BasicBlock *, BasicBlock *>
- Pair(getLoopPredecessor(L), L->getHeader());
+ Pair(L->getLoopPredecessor(), L->getHeader());
Pair.first;
Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) {
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 17b254f..58711b8 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -12,7 +12,7 @@
//
// This differs from traditional loop dependence analysis in that it tests
// for dependencies within a single iteration of a loop, rather than
-// dependences between different iterations.
+// dependencies between different iterations.
//
// ScalarEvolution has a more complete understanding of pointer arithmetic
// than BasicAliasAnalysis' collection of ad-hoc analyses.
@@ -106,6 +106,12 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
AliasAnalysis::AliasResult
ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
const Value *B, unsigned BSize) {
+ // If either of the memory references is empty, it doesn't matter what the
+ // pointer values are. This allows the code below to ignore this special
+ // case.
+ if (ASize == 0 || BSize == 0)
+ return NoAlias;
+
// This is ScalarEvolutionAliasAnalysis. Get the SCEVs!
const SCEV *AS = SE->getSCEV(const_cast<Value *>(A));
const SCEV *BS = SE->getSCEV(const_cast<Value *>(B));
@@ -118,14 +124,32 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize,
if (SE->getEffectiveSCEVType(AS->getType()) ==
SE->getEffectiveSCEVType(BS->getType())) {
unsigned BitWidth = SE->getTypeSizeInBits(AS->getType());
- APInt AI(BitWidth, ASize);
+ APInt ASizeInt(BitWidth, ASize);
+ APInt BSizeInt(BitWidth, BSize);
+
+ // Compute the difference between the two pointers.
const SCEV *BA = SE->getMinusSCEV(BS, AS);
- if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) {
- APInt BI(BitWidth, BSize);
- const SCEV *AB = SE->getMinusSCEV(AS, BS);
- if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin()))
- return NoAlias;
- }
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) &&
+ (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax()))
+ return NoAlias;
+
+ // Folding the subtraction while preserving range information can be tricky
+ // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS
+ // and try again to see if things fold better that way.
+
+ // Compute the difference between the two pointers.
+ const SCEV *AB = SE->getMinusSCEV(AS, BS);
+
+ // Test whether the difference is known to be great enough that memory of
+ // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt
+ // are non-zero, which is special-cased above.
+ if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) &&
+ (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax()))
+ return NoAlias;
}
// If ScalarEvolution can find an underlying object, form a new query.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 0012b84..d4a4b26 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -21,6 +21,43 @@
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
+/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
+/// reusing an existing cast if a suitable one exists, moving an existing
+/// cast if a suitable one exists but isn't in the right place, or
+/// creating a new one.
+Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty,
+ Instruction::CastOps Op,
+ BasicBlock::iterator IP) {
+ // Check to see if there is already a cast!
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (U->getType() == Ty)
+ if (CastInst *CI = dyn_cast<CastInst>(U))
+ if (CI->getOpcode() == Op) {
+ // If the cast isn't where we want it, fix it.
+ if (BasicBlock::iterator(CI) != IP) {
+ // Create a new cast, and leave the old cast in place in case
+ // it is being used as an insert point. Clear its operand
+ // so that it doesn't hold anything live.
+ Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP);
+ NewCI->takeName(CI);
+ CI->replaceAllUsesWith(NewCI);
+ CI->setOperand(0, UndefValue::get(V->getType()));
+ rememberInstruction(NewCI);
+ return NewCI;
+ }
+ rememberInstruction(CI);
+ return CI;
+ }
+ }
+
+ // Create a new cast.
+ Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP);
+ rememberInstruction(I);
+ return I;
+}
+
/// InsertNoopCastOfTo - Insert a cast of V to the specified type,
/// which must be possible with a noop cast, doing what we can to share
/// the casts.
@@ -54,71 +91,29 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) {
return CE->getOperand(0);
}
+ // Fold a cast of a constant.
if (Constant *C = dyn_cast<Constant>(V))
return ConstantExpr::getCast(Op, C, Ty);
+ // Cast the argument at the beginning of the entry block, after
+ // any bitcasts of other arguments.
if (Argument *A = dyn_cast<Argument>(V)) {
- // Check to see if there is already a cast!
- for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI)
- if ((*UI)->getType() == Ty)
- if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
- if (CI->getOpcode() == Op) {
- // If the cast isn't the first instruction of the function, move it.
- if (BasicBlock::iterator(CI) !=
- A->getParent()->getEntryBlock().begin()) {
- // Recreate the cast at the beginning of the entry block.
- // The old cast is left in place in case it is being used
- // as an insert point.
- Instruction *NewCI =
- CastInst::Create(Op, V, Ty, "",
- A->getParent()->getEntryBlock().begin());
- NewCI->takeName(CI);
- CI->replaceAllUsesWith(NewCI);
- return NewCI;
- }
- return CI;
- }
-
- Instruction *I = CastInst::Create(Op, V, Ty, V->getName(),
- A->getParent()->getEntryBlock().begin());
- rememberInstruction(I);
- return I;
+ BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
+ while ((isa<BitCastInst>(IP) &&
+ isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
+ cast<BitCastInst>(IP)->getOperand(0) != A) ||
+ isa<DbgInfoIntrinsic>(IP))
+ ++IP;
+ return ReuseOrCreateCast(A, Ty, Op, IP);
}
+ // Cast the instruction immediately after the instruction.
Instruction *I = cast<Instruction>(V);
-
- // Check to see if there is already a cast. If there is, use it.
- for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI) {
- if ((*UI)->getType() == Ty)
- if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
- if (CI->getOpcode() == Op) {
- BasicBlock::iterator It = I; ++It;
- if (isa<InvokeInst>(I))
- It = cast<InvokeInst>(I)->getNormalDest()->begin();
- while (isa<PHINode>(It)) ++It;
- if (It != BasicBlock::iterator(CI)) {
- // Recreate the cast after the user.
- // The old cast is left in place in case it is being used
- // as an insert point.
- Instruction *NewCI = CastInst::Create(Op, V, Ty, "", It);
- NewCI->takeName(CI);
- CI->replaceAllUsesWith(NewCI);
- rememberInstruction(NewCI);
- return NewCI;
- }
- rememberInstruction(CI);
- return CI;
- }
- }
BasicBlock::iterator IP = I; ++IP;
if (InvokeInst *II = dyn_cast<InvokeInst>(I))
IP = II->getNormalDest()->begin();
- while (isa<PHINode>(IP)) ++IP;
- Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP);
- rememberInstruction(CI);
- return CI;
+ while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP)) ++IP;
+ return ReuseOrCreateCast(I, Ty, Op, IP);
}
/// InsertBinop - Insert the specified binary operator, doing a small amount
@@ -295,11 +290,11 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
// the sum into a single value, so just use that.
Ops.clear();
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
- Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+ Ops.append(Add->op_begin(), Add->op_end());
else if (!Sum->isZero())
Ops.push_back(Sum);
// Then append the addrecs.
- Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+ Ops.append(AddRecs.begin(), AddRecs.end());
}
/// SplitAddRecs - Flatten a list of add operands, moving addrec start values
@@ -322,7 +317,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
A->getLoop()));
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
Ops[i] = Zero;
- Ops.insert(Ops.end(), Add->op_begin(), Add->op_end());
+ Ops.append(Add->op_begin(), Add->op_end());
e += Add->getNumOperands();
} else {
Ops[i] = Start;
@@ -330,7 +325,7 @@ static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
}
if (!AddRecs.empty()) {
// Add the addrecs onto the end of the list.
- Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end());
+ Ops.append(AddRecs.begin(), AddRecs.end());
// Resort the operand list, moving any constants to the front.
SimplifyAddOperands(Ops, Ty, SE);
}
@@ -1070,7 +1065,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
BasicBlock::iterator NewInsertPt =
llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
- while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
+ while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt))
+ ++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
restoreInsertPoint(SaveInsertBB, SaveInsertPt);
@@ -1107,8 +1103,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
}
// {0,+,1} --> Insert a canonical induction variable into the loop!
- if (S->isAffine() &&
- S->getOperand(1) == SE.getConstant(Ty, 1)) {
+ if (S->isAffine() && S->getOperand(1)->isOne()) {
// If there's a canonical IV, just use it.
if (CanonicalIV) {
assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
@@ -1125,17 +1120,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Constant *One = ConstantInt::get(Ty, 1);
for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header);
- HPI != HPE; ++HPI)
- if (L->contains(*HPI)) {
+ HPI != HPE; ++HPI) {
+ BasicBlock *HP = *HPI;
+ if (L->contains(HP)) {
// Insert a unit add instruction right before the terminator
// corresponding to the back-edge.
Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next",
- (*HPI)->getTerminator());
+ HP->getTerminator());
rememberInstruction(Add);
- PN->addIncoming(Add, *HPI);
+ PN->addIncoming(Add, HP);
} else {
- PN->addIncoming(Constant::getNullValue(Ty), *HPI);
+ PN->addIncoming(Constant::getNullValue(Ty), HP);
}
+ }
}
// {0,+,F} --> {0,+,1} * F
@@ -1312,7 +1309,9 @@ Value *SCEVExpander::expand(const SCEV *S) {
}
void SCEVExpander::rememberInstruction(Value *I) {
- if (PostIncLoops.empty())
+ if (!PostIncLoops.empty())
+ InsertedPostIncValues.insert(I);
+ else
InsertedValues.insert(I);
// If we just claimed an existing instruction and that instruction had
diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp
index 75c381d..563fd2f 100644
--- a/lib/Analysis/ScalarEvolutionNormalization.cpp
+++ b/lib/Analysis/ScalarEvolutionNormalization.cpp
@@ -105,22 +105,25 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind,
case NormalizeAutodetect:
if (Instruction *OI = dyn_cast<Instruction>(OperandValToReplace))
if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) {
- Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
+ const SCEV *TransformedStep =
+ TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
+ User, OperandValToReplace, Loops, SE, DT);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
Loops.insert(L);
}
break;
case Normalize:
- if (Loops.count(L))
- Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE));
- break;
- case Denormalize:
if (Loops.count(L)) {
const SCEV *TransformedStep =
TransformForPostIncUse(Kind, AR->getStepRecurrence(SE),
User, OperandValToReplace, Loops, SE, DT);
- Result = SE.getAddExpr(Result, TransformedStep);
+ Result = SE.getMinusSCEV(Result, TransformedStep);
}
break;
+ case Denormalize:
+ if (Loops.count(L))
+ Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE));
+ break;
}
return Result;
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 7e8ec2e..b4c9884 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -953,7 +953,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
// sqrt(-0.0) = -0.0, no other negative results are possible.
if (II->getIntrinsicID() == Intrinsic::sqrt)
- return CannotBeNegativeZero(II->getOperand(1), Depth+1);
+ return CannotBeNegativeZero(II->getArgOperand(0), Depth+1);
if (const CallInst *CI = dyn_cast<CallInst>(I))
if (const Function *F = CI->getCalledFunction()) {
@@ -966,7 +966,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
if (F->getName() == "fabsl") return true;
if (F->getName() == "sqrt" || F->getName() == "sqrtf" ||
F->getName() == "sqrtl")
- return CannotBeNegativeZero(CI->getOperand(1), Depth+1);
+ return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1);
}
}
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 21d4f65..7eeeb59 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -366,8 +366,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
// Check for errors opening or creating archive file.
if (!ArchiveFile.is_open() || ArchiveFile.bad()) {
- if (TmpArchive.exists())
- TmpArchive.eraseFromDisk();
+ TmpArchive.eraseFromDisk();
if (ErrMsg)
*ErrMsg = "Error opening archive file: " + archPath.str();
return true;
@@ -387,8 +386,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
for (MembersList::iterator I = begin(), E = end(); I != E; ++I) {
if (writeMember(*I, ArchiveFile, CreateSymbolTable,
TruncateNames, Compress, ErrMsg)) {
- if (TmpArchive.exists())
- TmpArchive.eraseFromDisk();
+ TmpArchive.eraseFromDisk();
ArchiveFile.close();
return true;
}
@@ -420,8 +418,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
std::ofstream FinalFile(FinalFilePath.c_str(), io_mode);
if (!FinalFile.is_open() || FinalFile.bad()) {
- if (TmpArchive.exists())
- TmpArchive.eraseFromDisk();
+ TmpArchive.eraseFromDisk();
if (ErrMsg)
*ErrMsg = "Error opening archive file: " + FinalFilePath.str();
return true;
@@ -438,8 +435,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress,
if (foreignST) {
if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) {
FinalFile.close();
- if (TmpArchive.exists())
- TmpArchive.eraseFromDisk();
+ TmpArchive.eraseFromDisk();
return true;
}
}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 9b4370f..f4c0e50 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -492,6 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(private);
KEYWORD(linker_private);
+ KEYWORD(linker_private_weak);
KEYWORD(internal);
KEYWORD(available_externally);
KEYWORD(linkonce);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 226d8d3..6752181 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -196,19 +196,20 @@ bool LLParser::ParseTopLevelEntities() {
// optional leading prefixes, the production is:
// GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal
// OptionalAddrSpace ('constant'|'global') ...
- case lltok::kw_private : // OptionalLinkage
- case lltok::kw_linker_private: // OptionalLinkage
- case lltok::kw_internal: // OptionalLinkage
- case lltok::kw_weak: // OptionalLinkage
- case lltok::kw_weak_odr: // OptionalLinkage
- case lltok::kw_linkonce: // OptionalLinkage
- case lltok::kw_linkonce_odr: // OptionalLinkage
- case lltok::kw_appending: // OptionalLinkage
- case lltok::kw_dllexport: // OptionalLinkage
- case lltok::kw_common: // OptionalLinkage
- case lltok::kw_dllimport: // OptionalLinkage
- case lltok::kw_extern_weak: // OptionalLinkage
- case lltok::kw_external: { // OptionalLinkage
+ case lltok::kw_private: // OptionalLinkage
+ case lltok::kw_linker_private: // OptionalLinkage
+ case lltok::kw_linker_private_weak: // OptionalLinkage
+ case lltok::kw_internal: // OptionalLinkage
+ case lltok::kw_weak: // OptionalLinkage
+ case lltok::kw_weak_odr: // OptionalLinkage
+ case lltok::kw_linkonce: // OptionalLinkage
+ case lltok::kw_linkonce_odr: // OptionalLinkage
+ case lltok::kw_appending: // OptionalLinkage
+ case lltok::kw_dllexport: // OptionalLinkage
+ case lltok::kw_common: // OptionalLinkage
+ case lltok::kw_dllimport: // OptionalLinkage
+ case lltok::kw_extern_weak: // OptionalLinkage
+ case lltok::kw_external: { // OptionalLinkage
unsigned Linkage, Visibility;
if (ParseOptionalLinkage(Linkage) ||
ParseOptionalVisibility(Visibility) ||
@@ -629,7 +630,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Linkage != GlobalValue::WeakODRLinkage &&
Linkage != GlobalValue::InternalLinkage &&
Linkage != GlobalValue::PrivateLinkage &&
- Linkage != GlobalValue::LinkerPrivateLinkage)
+ Linkage != GlobalValue::LinkerPrivateLinkage &&
+ Linkage != GlobalValue::LinkerPrivateWeakLinkage)
return Error(LinkageLoc, "invalid linkage type for alias");
Constant *Aliasee;
@@ -1013,11 +1015,13 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
/// ::= /*empty*/
/// ::= 'private'
/// ::= 'linker_private'
+/// ::= 'linker_private_weak'
/// ::= 'internal'
/// ::= 'weak'
/// ::= 'weak_odr'
/// ::= 'linkonce'
/// ::= 'linkonce_odr'
+/// ::= 'available_externally'
/// ::= 'appending'
/// ::= 'dllexport'
/// ::= 'common'
@@ -1030,6 +1034,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) {
default: Res=GlobalValue::ExternalLinkage; return false;
case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break;
case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break;
+ case lltok::kw_linker_private_weak:
+ Res = GlobalValue::LinkerPrivateWeakLinkage;
+ break;
case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break;
case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break;
case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break;
@@ -2704,6 +2711,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
break;
case GlobalValue::PrivateLinkage:
case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
case GlobalValue::InternalLinkage:
case GlobalValue::AvailableExternallyLinkage:
case GlobalValue::LinkOnceAnyLinkage:
@@ -3791,8 +3799,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
}
}
- if (Size && !Size->getType()->isIntegerTy(32))
- return Error(SizeLoc, "element count must be i32");
+ if (Size && !Size->getType()->isIntegerTy())
+ return Error(SizeLoc, "element count must have integer type");
if (isAlloca) {
Inst = new AllocaInst(Ty, Size, Alignment);
@@ -3801,6 +3809,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
// Autoupgrade old malloc instruction to malloc call.
// FIXME: Remove in LLVM 3.0.
+ if (Size && !Size->getType()->isIntegerTy(32))
+ return Error(SizeLoc, "element count must be i32");
const Type *IntPtrTy = Type::getInt32Ty(Context);
Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 5eed170..2703134 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -37,9 +37,9 @@ namespace lltok {
kw_declare, kw_define,
kw_global, kw_constant,
- kw_private, kw_linker_private, kw_internal, kw_linkonce, kw_linkonce_odr,
- kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common,
- kw_available_externally,
+ kw_private, kw_linker_private, kw_linker_private_weak, kw_internal,
+ kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending,
+ kw_dllimport, kw_dllexport, kw_common, kw_available_externally,
kw_default, kw_hidden, kw_protected,
kw_extern_weak,
kw_external, kw_thread_local,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 69adead..527ae49 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -75,6 +75,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) {
case 11: return GlobalValue::LinkOnceODRLinkage;
case 12: return GlobalValue::AvailableExternallyLinkage;
case 13: return GlobalValue::LinkerPrivateLinkage;
+ case 14: return GlobalValue::LinkerPrivateWeakLinkage;
}
}
@@ -252,17 +253,18 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
// at once.
while (!Placeholder->use_empty()) {
Value::use_iterator UI = Placeholder->use_begin();
+ User *U = *UI;
// If the using object isn't uniqued, just update the operands. This
// handles instructions and initializers for global variables.
- if (!isa<Constant>(*UI) || isa<GlobalValue>(*UI)) {
+ if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
UI.getUse().set(RealVal);
continue;
}
// Otherwise, we have a constant that uses the placeholder. Replace that
// constant with a new constant that has *all* placeholder uses updated.
- Constant *UserC = cast<Constant>(*UI);
+ Constant *UserC = cast<Constant>(U);
for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
I != E; ++I) {
Value *NewOp;
@@ -2178,13 +2180,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
InstructionList.push_back(I);
break;
}
- case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align]
- if (Record.size() < 3)
+ case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
+ // For backward compatibility, tolerate a lack of an opty, and use i32.
+ // LLVM 3.0: Remove this.
+ if (Record.size() < 3 || Record.size() > 4)
return Error("Invalid ALLOCA record");
+ unsigned OpNum = 0;
const PointerType *Ty =
- dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
- Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
- unsigned Align = Record[2];
+ dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++]));
+ const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) :
+ Type::getInt32Ty(Context);
+ Value *Size = getFnValueByID(Record[OpNum++], OpTy);
+ unsigned Align = Record[OpNum++];
if (!Ty || !Size) return Error("Invalid ALLOCA record");
I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
InstructionList.push_back(I);
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 9bda6dc..fa1b2c4 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -313,6 +313,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) {
case GlobalValue::LinkOnceODRLinkage: return 11;
case GlobalValue::AvailableExternallyLinkage: return 12;
case GlobalValue::LinkerPrivateLinkage: return 13;
+ case GlobalValue::LinkerPrivateWeakLinkage: return 14;
}
}
@@ -577,10 +578,9 @@ static void WriteFunctionLocalMetadata(const Function &F,
BitstreamWriter &Stream) {
bool StartedMetadataBlock = false;
SmallVector<uint64_t, 64> Record;
- const ValueEnumerator::ValueList &Vals = VE.getMDValues();
-
+ const SmallVector<const MDNode *, 8> &Vals = VE.getFunctionLocalMDValues();
for (unsigned i = 0, e = Vals.size(); i != e; ++i)
- if (const MDNode *N = dyn_cast<MDNode>(Vals[i].first))
+ if (const MDNode *N = Vals[i])
if (N->isFunctionLocal() && N->getFunction() == &F) {
if (!StartedMetadataBlock) {
Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
@@ -588,7 +588,7 @@ static void WriteFunctionLocalMetadata(const Function &F,
}
WriteMDNode(N, VE, Stream, Record);
}
-
+
if (StartedMetadataBlock)
Stream.ExitBlock();
}
@@ -1114,6 +1114,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
case Instruction::Alloca:
Code = bitc::FUNC_CODE_INST_ALLOCA;
Vals.push_back(VE.getTypeID(I.getType()));
+ Vals.push_back(VE.getTypeID(I.getOperand(0)->getType()));
Vals.push_back(VE.getValueID(I.getOperand(0))); // size.
Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
break;
@@ -1134,26 +1135,25 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
Vals.push_back(cast<StoreInst>(I).isVolatile());
break;
case Instruction::Call: {
- const PointerType *PTy = cast<PointerType>(I.getOperand(0)->getType());
+ const CallInst &CI = cast<CallInst>(I);
+ const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
Code = bitc::FUNC_CODE_INST_CALL;
- const CallInst *CI = cast<CallInst>(&I);
- Vals.push_back(VE.getAttributeID(CI->getAttributes()));
- Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall()));
- PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee
+ Vals.push_back(VE.getAttributeID(CI.getAttributes()));
+ Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
+ PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee
// Emit value #'s for the fixed parameters.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param.
+ Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param.
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams();
- for (unsigned i = I.getNumOperands()-NumVarargs, e = I.getNumOperands();
+ for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands();
i != e; ++i)
- PushValueAndType(I.getOperand(i), InstID, Vals, VE); // varargs
+ PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs
}
break;
}
@@ -1662,15 +1662,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
WriteBitcodeToStream( M, Stream );
- // If writing to stdout, set binary mode.
- if (&llvm::outs() == &Out)
- sys::Program::ChangeStdoutToBinary();
-
// Write the generated bitstream to "Out".
Out.write((char*)&Buffer.front(), Buffer.size());
-
- // Make sure it hits disk now.
- Out.flush();
}
/// WriteBitcodeToStream - Write the specified module to the specified output
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index d2baec7..7fa425a 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -72,7 +72,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
// Enumerate types used by the type symbol table.
EnumerateTypeSymbolTable(M->getTypeSymbolTable());
- // Insert constants and metadata that are named at module level into the slot
+ // Insert constants and metadata that are named at module level into the slot
// pool so that the module symbol table can refer to them...
EnumerateValueSymbolTable(M->getValueSymbolTable());
EnumerateMDSymbolTable(M->getMDSymbolTable());
@@ -257,6 +257,8 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) {
else
EnumerateType(Type::getVoidTy(MD->getContext()));
}
+ if (N->isFunctionLocal() && N->getFunction())
+ FunctionLocalMDs.push_back(N);
return;
}
@@ -414,7 +416,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
FirstInstID = Values.size();
- SmallVector<MDNode *, 8> FunctionLocalMDs;
+ FunctionLocalMDs.clear();
+ SmallVector<MDNode *, 8> FnLocalMDVector;
// Add all of the instructions.
for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
@@ -423,7 +426,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
if (MDNode *MD = dyn_cast<MDNode>(*OI))
if (MD->isFunctionLocal() && MD->getFunction())
// Enumerate metadata after the instructions they might refer to.
- FunctionLocalMDs.push_back(MD);
+ FnLocalMDVector.push_back(MD);
}
if (!I->getType()->isVoidTy())
EnumerateValue(I);
@@ -431,8 +434,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) {
}
// Add all of the function-local metadata.
- for (unsigned i = 0, e = FunctionLocalMDs.size(); i != e; ++i)
- EnumerateOperandType(FunctionLocalMDs[i]);
+ for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i)
+ EnumerateOperandType(FnLocalMDVector[i]);
}
void ValueEnumerator::purgeFunction() {
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 4f8ebf5..2b9b15f 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -15,6 +15,7 @@
#define VALUE_ENUMERATOR_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Attributes.h"
#include <vector>
@@ -26,7 +27,7 @@ class Instruction;
class BasicBlock;
class Function;
class Module;
-class MetadataBase;
+class MDNode;
class NamedMDNode;
class AttrListPtr;
class TypeSymbolTable;
@@ -49,6 +50,7 @@ private:
ValueMapType ValueMap;
ValueList Values;
ValueList MDValues;
+ SmallVector<const MDNode *, 8> FunctionLocalMDs;
ValueMapType MDValueMap;
typedef DenseMap<void*, unsigned> AttributeMapType;
@@ -105,6 +107,9 @@ public:
const ValueList &getValues() const { return Values; }
const ValueList &getMDValues() const { return MDValues; }
+ const SmallVector<const MDNode *, 8> &getFunctionLocalMDValues() const {
+ return FunctionLocalMDs;
+ }
const TypeList &getTypes() const { return Types; }
const std::vector<const BasicBlock*> &getBasicBlocks() const {
return BasicBlocks;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 4008a6a..a7189ac 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi,
TargetSubtarget::RegClassVector& CriticalPathRCs) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF)),
State(NULL) {
@@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
DefIndices[AliasReg] = ~0u;
}
}
- } else {
- // In a non-return block, examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
- }
+ unsigned Reg = *I;
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ State->UnionGroups(AliasReg, 0);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
}
- }
+ }
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
@@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
// If MI's defs have a special allocation requirement, don't allow
// any def registers to be changed. Also assume all registers
// defined in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) {
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI)) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
+ // If MI's uses have special allocation requirement, don't allow
+ // any use registers to be changed. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
// Scan the register uses for this instruction and update
// live-ranges, groups and RegRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// for the register.
HandleLastUse(Reg, Count, "(last-use)");
- // If MI's uses have special allocation requirement, don't allow
- // any use registers to be changed. Also assume all registers
- // used in a call must not be changed (ABI).
- if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) {
+ if (Special) {
DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)");
State->UnionGroups(Reg, 0);
}
@@ -604,8 +626,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
// order. If that register is available, and the corresponding
// registers are available for the other group subregisters, then we
// can use those registers to rename.
+
+ // FIXME: Using getMinimalPhysRegClass is very conservative. We should
+ // check every use of the register and find the largest register class
+ // that can be used in all of them.
const TargetRegisterClass *SuperRC =
- TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other);
+ TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
@@ -905,6 +931,19 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
AggressiveAntiDepState::RegisterReference>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second.Operand->setReg(NewReg);
+ // If the SU for the instruction being updated has debug
+ // information related to the anti-dependency register, make
+ // sure to update that as well.
+ const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()];
+ if (!SU) continue;
+ for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+ MachineInstr *DI = SU->DbgInstrList[i];
+ assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg()
+ && "Non register dbg_value attached to SUnit!");
+ if (DI->getOperand(0).getReg() == AntiDepReg)
+ DI->getOperand(0).setReg(NewReg);
+ }
}
// We just went back in time and modified history; the
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 506d43e..91ebb85 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -115,6 +115,7 @@ namespace llvm {
class AggressiveAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
/// AllocatableSet - The set of allocatable registers.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5a0c27b..d9387a8 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -199,7 +199,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
case GlobalValue::LinkOnceODRLinkage:
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
- case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
if (MAI->getWeakDefDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
@@ -225,6 +225,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
break;
case GlobalValue::PrivateLinkage:
case GlobalValue::InternalLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
break;
default:
llvm_unreachable("Unknown linkage type!");
@@ -330,7 +331,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
else if (GVKind.isThreadData()) {
OutStreamer.SwitchSection(TheSection);
- EmitLinkage(GV->getLinkage(), MangSym);
EmitAlignment(AlignLog, GV);
OutStreamer.EmitLabel(MangSym);
@@ -353,7 +353,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// - spare pointer, used when mapped by the runtime
// - pointer to mangled symbol above with initializer
unsigned PtrSize = TD->getPointerSizeInBits()/8;
- OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"),
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"),
PtrSize, 0);
OutStreamer.EmitIntValue(0, PtrSize, 0);
OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
@@ -428,20 +428,12 @@ void AsmPrinter::EmitFunctionHeader() {
// Emit pre-function debug and/or EH information.
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->BeginFunction(MF);
- } else {
- DE->BeginFunction(MF);
- }
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->BeginFunction(MF);
}
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->beginFunction(MF);
- } else {
- DD->beginFunction(MF);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginFunction(MF);
}
}
@@ -458,14 +450,11 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
-/// EmitComments - Pretty-print comments for instructions.
-static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
- const MachineFunction *MF = MI.getParent()->getParent();
- const TargetMachine &TM = MF->getTarget();
-
- DebugLoc DL = MI.getDebugLoc();
+static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
if (!DL.isUnknown()) { // Print source line info.
- DIScope Scope(DL.getScope(MF->getFunction()->getContext()));
+ DIScope Scope(DL.getScope(Ctx));
// Omit the directory, because it's likely to be long and uninteresting.
if (Scope.Verify())
CommentOS << Scope.getFilename();
@@ -474,6 +463,23 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
CommentOS << ':' << DL.getLine();
if (DL.getCol() != 0)
CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << "[ ";
+ EmitDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
+/// EmitComments - Pretty-print comments for instructions.
+static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
+ const MachineFunction *MF = MI.getParent()->getParent();
+ const TargetMachine &TM = MF->getTarget();
+
+ DebugLoc DL = MI.getDebugLoc();
+ if (!DL.isUnknown()) { // Print source line info.
+ EmitDebugLoc(DL, MF, CommentOS);
CommentOS << '\n';
}
@@ -611,12 +617,8 @@ void AsmPrinter::EmitFunctionBody() {
}
if (ShouldPrintDebugScopes) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->beginScope(II);
- } else {
- DD->beginScope(II);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->beginScope(II);
}
if (isVerbose())
@@ -649,12 +651,8 @@ void AsmPrinter::EmitFunctionBody() {
}
if (ShouldPrintDebugScopes) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endScope(II);
- } else {
- DD->endScope(II);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endScope(II);
}
}
}
@@ -692,20 +690,12 @@ void AsmPrinter::EmitFunctionBody() {
// Emit post-function debug information.
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endFunction(MF);
- } else {
- DD->endFunction(MF);
- }
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DD->endFunction(MF);
}
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->EndFunction();
- } else {
- DE->EndFunction();
- }
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
+ DE->EndFunction();
}
MMI->EndFunction();
@@ -730,19 +720,15 @@ bool AsmPrinter::doFinalization(Module &M) {
// Finalize debug and EH information.
if (DE) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(EHTimerName, DWARFGroupName);
- DE->EndModule();
- } else {
+ {
+ NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled);
DE->EndModule();
}
delete DE; DE = 0;
}
if (DD) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- DD->endModule();
- } else {
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
DD->endModule();
}
delete DD; DD = 0;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index ba6fed2..f6f3bae 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -83,7 +83,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
- AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI);
+ AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI);
OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(Parser));
if (!TAP)
report_fatal_error("Inline asm not supported by this streamer because"
@@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = 1;
+ unsigned OpNo = 2;
bool Error = false;
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index b2c70d5..21396ca 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -201,6 +201,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const {
case dwarf::DW_FORM_data8: Size = 8; break;
case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+ case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break;
default: llvm_unreachable("DIE Value form not supported yet");
}
Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/);
@@ -221,6 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const {
case dwarf::DW_FORM_data8: return sizeof(int64_t);
case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer);
case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer);
+ case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize();
default: llvm_unreachable("DIE Value form not supported yet"); break;
}
return 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 890507c..65c1d19 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -44,7 +44,8 @@ using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
cl::desc("Print DbgScope information for each machine instruction"));
-static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
+static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print",
+ cl::Hidden,
cl::desc("Disable debug info printing"));
static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
@@ -79,15 +80,13 @@ class CompileUnit {
/// IndexTyDie - An anonymous type for index type. Owned by CUDie.
DIE *IndexTyDie;
- /// GVToDieMap - Tracks the mapping of unit level debug informaton
+ /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton
/// variables to debug information entries.
- /// FIXME : Rename GVToDieMap -> NodeToDieMap
- DenseMap<const MDNode *, DIE *> GVToDieMap;
+ DenseMap<const MDNode *, DIE *> MDNodeToDieMap;
- /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
+ /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton
/// descriptors to debug information entries using a DIEEntry proxy.
- /// FIXME : Rename
- DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap;
+ DenseMap<const MDNode *, DIEEntry *> MDNodeToDIEEntryMap;
/// Globals - A map of globally visible named entities for this unit.
///
@@ -123,25 +122,25 @@ public:
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable.
- DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); }
+ DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); }
/// insertDIE - Insert DIE into the map.
void insertDIE(const MDNode *N, DIE *D) {
- GVToDieMap.insert(std::make_pair(N, D));
+ MDNodeToDieMap.insert(std::make_pair(N, D));
}
/// getDIEEntry - Returns the debug information entry for the speciefied
/// debug variable.
DIEEntry *getDIEEntry(const MDNode *N) {
- DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
- if (I == GVToDIEEntryMap.end())
+ DenseMap<const MDNode *, DIEEntry *>::iterator I = MDNodeToDIEEntryMap.find(N);
+ if (I == MDNodeToDIEEntryMap.end())
return NULL;
return I->second;
}
/// insertDIEEntry - Insert debug information entry into the map.
void insertDIEEntry(const MDNode *N, DIEEntry *E) {
- GVToDIEEntryMap.insert(std::make_pair(N, E));
+ MDNodeToDIEEntryMap.insert(std::make_pair(N, E));
}
/// addDie - Adds or interns the DIE to the compile unit.
@@ -321,12 +320,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ DwarfDebugLineSectionSym = CurrentLineSectionSym = 0;
FunctionBeginSym = FunctionEndSym = 0;
- if (TimePassesIsEnabled) {
- NamedRegionTimer T(DbgTimerName, DWARFGroupName);
- beginModule(M);
- } else {
- beginModule(M);
+ DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1);
+ {
+ NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+ beginModule(M);
}
}
DwarfDebug::~DwarfDebug() {
@@ -378,7 +377,8 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
unsigned Form, uint64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
+ DIEValue *Value = Integer == 1 ?
+ DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -866,6 +866,10 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
} else if (Context.isNameSpace()) {
DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
+ } else if (Context.isSubprogram()) {
+ DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
+ /*MakeDecl=*/false);
+ ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
else
@@ -1055,6 +1059,10 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
getOrCreateTypeDIE(DIType(ContainingType)));
+ else {
+ DIDescriptor Context = CTy.getContext();
+ addToContextOwner(&Buffer, Context);
+ }
break;
}
default:
@@ -1065,8 +1073,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
if (!Name.empty())
addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type ||
- Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) {
+ if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type
+ || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type)
+ {
// Add size if non-zero (derived types might be zero-sized.)
if (Size)
addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size);
@@ -1329,6 +1338,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
// DW_TAG_inlined_subroutine may refer to this DIE.
SPCU->insertDIE(SP, SPDie);
+ // Add to context owner.
+ addToContextOwner(SPDie, SP.getContext());
+
return SPDie;
}
@@ -1379,6 +1391,7 @@ static bool isSubprogramContext(const MDNode *Context) {
DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
CompileUnit *SPCU = getCompileUnit(SPNode);
DIE *SPDie = SPCU->getDIE(SPNode);
+
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
@@ -1412,6 +1425,14 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
SPCU->addDie(SPDie);
}
+ // Pick up abstract subprogram DIE.
+ if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) {
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsSPDIE);
+ SPCU->addDie(SPDie);
+ }
+
addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()));
addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
@@ -1483,7 +1504,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
- if (StartLabel == FunctionBeginSym || EndLabel == 0) {
+ if (StartLabel == 0 || EndLabel == 0) {
assert (0 && "Unexpected Start and End labels for a inlined scope!");
return 0;
}
@@ -1605,11 +1626,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
// FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
if (DVInsn->getOperand(0).isReg())
- updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated =
+ addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isImm())
updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isFPImm())
- updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated =
+ addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
if (Location.getReg()) {
@@ -1682,8 +1705,13 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (Scope->getInlinedAt())
ScopeDIE = constructInlinedScopeDIE(Scope);
else if (DS.isSubprogram()) {
- if (Scope->isAbstractScope())
+ ProcessedSPNodes.insert(DS);
+ if (Scope->isAbstractScope()) {
ScopeDIE = getCompileUnit(DS)->getDIE(DS);
+ // Note down abstract DIE.
+ if (ScopeDIE)
+ AbstractSPDies.insert(std::make_pair(DS, ScopeDIE));
+ }
else
ScopeDIE = updateSubprogramScopeDIE(DS);
}
@@ -1782,11 +1810,11 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
// Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This
// simplifies debug range entries.
- addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0);
+ addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0);
// DW_AT_stmt_list is a offset of line number information for this
- // compile unit in debug_line section. It is always zero when only one
- // compile unit is emitted in one object file.
- addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
+ // compile unit in debug_line section. This offset is calculated
+ // during endMoudle().
+ addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
if (!Dir.empty())
addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1996,6 +2024,40 @@ void DwarfDebug::beginModule(Module *M) {
///
void DwarfDebug::endModule() {
if (!FirstCU) return;
+ const Module *M = MMI->getModule();
+ if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) {
+ for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) {
+ if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue;
+ DISubprogram SP(AllSPs->getOperand(SI));
+ if (!SP.Verify()) continue;
+
+ // Collect info for variables that were optimized out.
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+ if (!NMD) continue;
+ unsigned E = NMD->getNumOperands();
+ if (!E) continue;
+ DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL);
+ for (unsigned I = 0; I != E; ++I) {
+ DIVariable DV(NMD->getOperand(I));
+ if (!DV.Verify()) continue;
+ Scope->addVariable(new DbgVariable(DV));
+ }
+
+ // Construct subprogram DIE and add variables DIEs.
+ constructSubprogramDIE(SP);
+ DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP);
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
+ if (VariableDIE)
+ ScopeDIE->addChild(VariableDIE);
+ }
+ }
+ }
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -2037,15 +2099,15 @@ void DwarfDebug::endModule() {
// Compute DIE offsets and sizes.
computeSizeAndOffsets();
+ // Emit source line correspondence into a debug line section.
+ emitDebugLines();
+
// Emit all the DIEs into a debug info section
emitDebugInfo();
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2150,8 +2212,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
}
/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
- SmallPtrSet<const MDNode *, 16> Processed;
+void
+DwarfDebug::collectVariableInfo(const MachineFunction *MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
/// collection info from MMI table.
collectVariableInfoFromMMITable(MF, Processed);
@@ -2180,16 +2243,23 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
if (Processed.count(DV) != 0)
continue;
+ const MachineInstr *PrevMI = MInsn;
for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
ME = DbgValues.end(); MI != ME; ++MI) {
const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI))
+ if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ !PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
+ PrevMI = *MI;
}
DbgScope *Scope = findDbgScope(MInsn);
- if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable)
+ bool CurFnArg = false;
+ if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
+ DISubprogram(DV.getContext()).describes(MF->getFunction()))
+ CurFnArg = true;
+ if (!Scope && CurFnArg)
Scope = CurrentFnDbgScope;
// If variable scope is not found then skip this variable.
if (!Scope)
@@ -2198,7 +2268,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
Processed.insert(DV);
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ if (!CurFnArg)
DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
@@ -2217,7 +2287,8 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
const MachineInstr *Begin = NULL;
const MachineInstr *End = NULL;
for (SmallVector<const MachineInstr *, 4>::iterator
- MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) {
+ MVI = MultipleValues.begin(), MVE = MultipleValues.end();
+ MVI != MVE; ++MVI) {
if (!Begin) {
Begin = *MVI;
continue;
@@ -2241,8 +2312,11 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
}
// Collect info for variables that were optimized out.
+ const Function *F = MF->getFunction();
+ const Module *M = F->getParent();
if (NamedMDNode *NMD =
- MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) {
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(F->getName())))) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
@@ -2319,7 +2393,8 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
}
/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
+ const MDNode *InlinedAt) {
if (!InlinedAt) {
DbgScope *WScope = DbgScopeMap.lookup(Scope);
if (WScope)
@@ -2335,13 +2410,20 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
if (!WScope->getParent()) {
StringRef SPName = DISubprogram(Scope).getLinkageName();
- if (SPName == Asm->MF->getFunction()->getName())
+ // We used to check only for a linkage name, but that fails
+ // since we began omitting the linkage name for private
+ // functions. The new way is to check for the name in metadata,
+ // but that's not supported in old .ll test cases. Ergo, we
+ // check both.
+ if (SPName == Asm->MF->getFunction()->getName() ||
+ DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
CurrentFnDbgScope = WScope;
}
return WScope;
}
+ getOrCreateAbstractScope(Scope);
DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
if (WScope)
return WScope;
@@ -2355,7 +2437,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
Parent->addScope(WScope);
ConcreteScopes[InlinedAt] = WScope;
- getOrCreateAbstractScope(Scope);
return WScope;
}
@@ -2365,8 +2446,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl
static bool hasValidLocation(LLVMContext &Ctx,
const MachineInstr *MInsn,
const MDNode *&Scope, const MDNode *&InlinedAt) {
- if (MInsn->isDebugValue())
- return false;
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) return false;
@@ -2488,7 +2567,8 @@ bool DwarfDebug::extractScopeInformation() {
// current instruction scope does not match scope of first instruction
// in this range then create a new instruction range.
DbgRange R(RangeBeginMI, PrevMI);
- MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+ MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
+ PrevInlinedAt);
MIRanges.push_back(R);
}
@@ -2565,7 +2645,6 @@ void DwarfDebug::identifyScopeMarkers() {
RE = Ranges.end(); RI != RE; ++RI) {
assert(RI->first && "DbgRange does not have first instruction!");
assert(RI->second && "DbgRange does not have second instruction!");
- InsnsBeginScopeSet.insert(RI->first);
InsnsEndScopeSet.insert(RI->second);
}
}
@@ -2616,6 +2695,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
recordSourceLine(Line, Col, Scope);
+ /// ProcessedArgs - Collection of arguments already processed.
+ SmallPtrSet<const MDNode *, 8> ProcessedArgs;
+
DebugLoc PrevLoc;
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I)
@@ -2624,14 +2706,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
const MachineInstr *MI = II;
DebugLoc DL = MI->getDebugLoc();
if (MI->isDebugValue()) {
- // DBG_VALUE needs a label if the variable is local variable or
- // an argument whose location is changing.
assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
- if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ // If DBG_VALUE is for a local variable then it needs a label.
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable
+ && isDbgValueInUndefinedReg(MI) == false)
InsnNeedsLabel.insert(MI);
- else if (!ProcessedArgs.insert(DV))
+ // DBG_VALUE for inlined functions argument needs a label.
+ else if (!DISubprogram(getDISubprogram(DV.getContext())).
+ describes(MF->getFunction()))
+ InsnNeedsLabel.insert(MI);
+ // DBG_VALUE indicating argument location change needs a label.
+ else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
// If location is unknown then instruction needs a location only if
@@ -2664,7 +2751,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Assumes in correct section after the entry point.
Asm->OutStreamer.EmitLabel(FunctionEndSym);
- collectVariableInfo(MF);
+ SmallPtrSet<const MDNode *, 16> ProcessedVars;
+ collectVariableInfo(MF, ProcessedVars);
// Get function line info.
if (!Lines.empty()) {
@@ -2679,9 +2767,31 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
- AE = AbstractScopesList.end(); AI != AE; ++AI)
- constructScopeDIE(*AI);
-
+ AE = AbstractScopesList.end(); AI != AE; ++AI) {
+ DISubprogram SP((*AI)->getScopeNode());
+ if (SP.Verify()) {
+ // Collect info for variables that were optimized out.
+ StringRef FName = SP.getLinkageName();
+ if (FName.empty())
+ FName = SP.getName();
+ const Module *M = MF->getFunction()->getParent();
+ if (NamedMDNode *NMD =
+ M->getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName)))) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ if (!DV || !ProcessedVars.insert(DV))
+ continue;
+ DbgScope *Scope = AbstractScopes.lookup(DV.getContext());
+ if (Scope)
+ Scope->addVariable(new DbgVariable(DV));
+ }
+ }
+ }
+ if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0)
+ constructScopeDIE(*AI);
+ }
+
DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope);
if (!DisableFramePointerElim(*MF))
@@ -2696,13 +2806,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Clear debug info
CurrentFnDbgScope = NULL;
InsnNeedsLabel.clear();
- ProcessedArgs.clear();
DbgVariableToFrameIndexMap.clear();
VarToAbstractVarMap.clear();
DbgVariableToDbgInstMap.clear();
DbgVariableLabelsMap.clear();
DeleteContainerSeconds(DbgScopeMap);
- InsnsBeginScopeSet.clear();
InsnsEndScopeSet.clear();
ConcreteScopes.clear();
DeleteContainerSeconds(AbstractScopes);
@@ -2764,7 +2872,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) {
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
+ const MDNode *S) {
StringRef Dir;
StringRef Fn;
@@ -2790,6 +2899,16 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode
Src = GetOrCreateSourceID(Dir, Fn);
}
+#if 0
+ if (!Lines.empty()) {
+ SrcLineInfo lastSrcLineInfo = Lines.back();
+ // Emitting sequential line records with the same line number (but
+ // different addresses) seems to confuse GDB. Avoid this.
+ if (lastSrcLineInfo.getLine() == Line)
+ return NULL;
+ }
+#endif
+
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
@@ -2898,7 +3017,8 @@ void DwarfDebug::EmitSectionLabels() {
if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection())
EmitSectionSym(Asm, MacroInfo);
- EmitSectionSym(Asm, TLOF.getDwarfLineSection());
+ DwarfDebugLineSectionSym =
+ EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line");
EmitSectionSym(Asm, TLOF.getDwarfLocSection());
EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection());
EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection());
@@ -2961,6 +3081,11 @@ void DwarfDebug::emitDIE(DIE *Die) {
4);
break;
}
+ case dwarf::DW_AT_stmt_list: {
+ Asm->EmitLabelDifference(CurrentLineSectionSym,
+ DwarfDebugLineSectionSym, 4);
+ break;
+ }
case dwarf::DW_AT_location: {
if (UseDotDebugLocEntry.count(Die) != 0) {
DIELabel *L = cast<DIELabel>(Values[i]);
@@ -3106,6 +3231,8 @@ void DwarfDebug::emitDebugLines() {
Asm->getObjFileLowering().getDwarfLineSection());
// Construct the section header.
+ CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin");
+ Asm->OutStreamer.EmitLabel(CurrentLineSectionSym);
Asm->OutStreamer.AddComment("Length of Source Line Info");
Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
Asm->GetTempSymbol("line_begin"), 4);
@@ -3491,8 +3618,9 @@ void DwarfDebug::emitDebugLoc() {
unsigned char Size = Asm->getTargetData().getPointerSize();
Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
unsigned index = 1;
- for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(),
- E = DotDebugLocEntries.end(); I != E; ++I, ++index) {
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I, ++index) {
DotDebugLocEntry Entry = *I;
if (Entry.isEmpty()) {
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 0d6116f..5a281c8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -156,6 +156,9 @@ class DwarfDebug {
/// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
DenseMap<const MDNode *, DbgScope *> AbstractScopes;
+ /// AbstractSPDies - Collection of abstract subprogram DIEs.
+ DenseMap<const MDNode *, DIE *> AbstractSPDies;
+
/// AbstractScopesList - Tracks abstract scopes constructed while processing
/// a function. This list is cleared during endFunction().
SmallVector<DbgScope *, 4>AbstractScopesList;
@@ -210,7 +213,7 @@ class DwarfDebug {
DenseMap<DIE *, const MDNode *> ContainingTypeMap;
typedef SmallVector<DbgScope *, 2> ScopeVector;
- SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
+
SmallPtrSet<const MachineInstr *, 8> InsnsEndScopeSet;
/// InlineInfo - Keep track of inlined functions and their location. This
@@ -219,6 +222,10 @@ class DwarfDebug {
DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
SmallVector<const MDNode *, 4> InlinedSPNodes;
+ // ProcessedSPNodes - This is a collection of subprogram MDNodes that
+ // are processed to create DIEs.
+ SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+
/// LabelsBeforeInsn - Maps instruction with label emitted before
/// instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn;
@@ -231,9 +238,6 @@ class DwarfDebug {
/// a debuggging information entity.
SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
- /// ProcessedArgs - Collection of arguments already processed.
- SmallPtrSet<const MDNode *, 8> ProcessedArgs;
-
SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
/// Previous instruction's location information. This is used to determine
@@ -257,7 +261,10 @@ class DwarfDebug {
MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
MCSymbol *DwarfDebugLocSectionSym;
+ MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym;
MCSymbol *FunctionBeginSym, *FunctionEndSym;
+
+ DIEInteger *DIEIntegerOne;
private:
/// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -593,7 +600,8 @@ private:
bool extractScopeInformation();
/// collectVariableInfo - Populate DbgScope entries with variables' info.
- void collectVariableInfo(const MachineFunction *);
+ void collectVariableInfo(const MachineFunction *,
+ SmallPtrSet<const MDNode *, 16> &ProcessedVars);
/// collectVariableInfoFromMMITable - Collect variable information from
/// side table maintained by MMI.
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index f92127f..c8a63cf 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -52,13 +52,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
SymName += "__";
SymName += Id;
-
+
// Capitalize the first letter of the module name.
SymName[Letter] = toupper(SymName[Letter]);
-
+
SmallString<128> TmpStr;
AP.Mang->getNameWithPrefix(TmpStr, SymName);
-
+
MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr);
AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 9dec22e..7f98df0 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -358,23 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
}
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
-/// after it, replacing it with an unconditional branch to NewDest. This
-/// returns true if OldInst's block is modified, false if NewDest is modified.
+/// after it, replacing it with an unconditional branch to NewDest.
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
- MachineBasicBlock *OldBB = OldInst->getParent();
-
- // Remove all the old successors of OldBB from the CFG.
- while (!OldBB->succ_empty())
- OldBB->removeSuccessor(OldBB->succ_begin());
-
- // Remove all the dead instructions from the end of OldBB.
- OldBB->erase(OldInst, OldBB->end());
-
- // If OldBB isn't immediately before OldBB, insert a branch to it.
- if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest))
- TII->InsertBranch(*OldBB, NewDest, 0, SmallVector<MachineOperand, 0>());
- OldBB->addSuccessor(NewDest);
+ TII->ReplaceTailWithBranchTo(OldInst, NewDest);
++NumTailMerge;
}
@@ -383,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
/// iterator. This returns the new MBB.
MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1) {
+ if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1))
+ return 0;
+
MachineFunction &MF = *CurMBB.getParent();
// Create the fall-through block.
@@ -443,18 +433,20 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
if (I != MF->end() &&
!TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
MachineBasicBlock *NextBB = I;
if (TBB == NextBB && !Cond.empty() && !FBB) {
if (!TII->ReverseBranchCondition(Cond)) {
TII->RemoveBranch(*CurMBB);
- TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond);
+ TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl);
return;
}
}
}
- TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
+ TII->InsertBranch(*CurMBB, SuccBB, NULL,
+ SmallVector<MachineOperand, 0>(), dl);
}
bool
@@ -625,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist
/// only of the common tail. Create a block that does by splitting one.
-unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
- unsigned maxCommonTailLength) {
- unsigned commonTailIndex = 0;
+bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex) {
+ commonTailIndex = 0;
unsigned TimeEstimate = ~0U;
for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
// Use PredBB if possible; that doesn't require a new branch.
@@ -655,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
<< maxCommonTailLength);
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
+ if (!newMBB) {
+ DEBUG(dbgs() << "... failed!");
+ return false;
+ }
+
SameTails[commonTailIndex].setBlock(newMBB);
SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
@@ -662,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
if (PredBB == MBB)
PredBB = newMBB;
- return commonTailIndex;
+ return true;
}
// See if any of the blocks in MergePotentials (which all have a common single
@@ -757,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
!SameTails[commonTailIndex].tailIsWholeBlock())) {
// None of the blocks consist entirely of the common tail.
// Split a block so that one does.
- commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ if (!CreateCommonTailOnlyBlock(PredBB,
+ maxCommonTailLength, commonTailIndex)) {
+ RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
+ continue;
+ }
}
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
@@ -874,10 +876,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
}
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
+ DebugLoc dl; // FIXME: this is nowhere
TII->RemoveBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl);
}
MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
}
@@ -976,6 +979,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
MachineFunction &MF = *MBB->getParent();
+ DebugLoc dl; // FIXME: this is nowhere
ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
@@ -1027,7 +1031,7 @@ ReoptimizeBlock:
TII->RemoveBranch(PrevBB);
PriorCond.clear();
if (PriorTBB != MBB)
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1066,7 +1070,7 @@ ReoptimizeBlock:
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1079,7 +1083,7 @@ ReoptimizeBlock:
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
+ TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1116,7 +1120,7 @@ ReoptimizeBlock:
<< "To make fallthrough to: " << *PriorTBB << "\n");
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
+ TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl);
// Move this block to the end of the function.
MBB->moveAfter(--MF.end());
@@ -1145,7 +1149,7 @@ ReoptimizeBlock:
SmallVector<MachineOperand, 4> NewCond(CurCond);
if (!TII->ReverseBranchCondition(NewCond)) {
TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
+ TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
MadeChange = true;
++NumBranchOpts;
goto ReoptimizeBlock;
@@ -1200,7 +1204,7 @@ ReoptimizeBlock:
PriorFBB = MBB;
}
TII->RemoveBranch(PrevBB);
- TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond);
+ TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl);
}
// Iterate through all the predecessors, revectoring each in-turn.
@@ -1226,7 +1230,7 @@ ReoptimizeBlock:
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
TII->RemoveBranch(*PMBB);
NewCurCond.clear();
- TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
+ TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl);
MadeChange = true;
++NumBranchOpts;
PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
@@ -1246,7 +1250,7 @@ ReoptimizeBlock:
}
// Add the branch back if the block is more than just an uncond branch.
- TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
+ TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl);
}
}
@@ -1286,7 +1290,7 @@ ReoptimizeBlock:
if (CurFallsThru) {
MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
- TII->InsertBranch(*MBB, NextBB, 0, CurCond);
+ TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl);
}
MBB->moveAfter(PredBB);
MadeChange = true;
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index b087395..15dfa7f 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -102,8 +102,9 @@ namespace llvm {
MachineBasicBlock *PredBB);
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
- unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
- unsigned maxCommonTailLength);
+ bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
+ unsigned maxCommonTailLength,
+ unsigned &commonTailIndex);
bool OptimizeBranches(MachineFunction &MF);
bool OptimizeBlock(MachineBasicBlock *MBB);
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 3e38872..ffeff1e 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,19 +1,20 @@
add_llvm_library(LLVMCodeGen
- Analysis.cpp
AggressiveAntiDepBreaker.cpp
+ Analysis.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
+ CallingConvLower.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
ELFCodeEmitter.cpp
ELFWriter.cpp
- ExactHazardRecognizer.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
IfConversion.cpp
+ InlineSpiller.cpp
IntrinsicLowering.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
@@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen
OptimizePHIs.cpp
PHIElimination.cpp
Passes.cpp
+ PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreAllocSplitting.cpp
ProcessImplicitDefs.cpp
@@ -52,7 +54,6 @@ add_llvm_library(LLVMCodeGen
PseudoSourceValue.cpp
RegAllocFast.cpp
RegAllocLinearScan.cpp
- RegAllocLocal.cpp
RegAllocPBQP.cpp
RegisterCoalescer.cpp
RegisterScavenging.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index a328d0e..240a7b9 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -116,7 +116,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
SmallVector<LiveInterval*, 4> spillIs;
if (lis->isReMaterializable(li, spillIs, isLoad)) {
// If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If non of the defs are
+ // it is a preferred candidate for spilling. If none of the defs are
// loads, then it's potentially very cheap to re-materialize.
// FIXME: this gets much more complicated once we support non-trivial
// re-materialization.
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 4e6c1fc..62ad817 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -80,13 +80,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
/// CheckReturn - Analyze the return values of a function, returning true if
/// the return can be performed without sret-demotion, and false otherwise.
-bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
- for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
- EVT VT = OutTys[i];
- ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ EVT VT = Outs[i].VT;
+ ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
return false;
}
@@ -99,7 +98,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].Val.getValueType();
+ EVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
@@ -111,14 +110,13 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
}
}
-
/// AnalyzeCallOperands - Analyze the outgoing arguments to a call,
/// incorporating info about the passed values into this state.
void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
unsigned NumOps = Outs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].Val.getValueType();
+ EVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 3ff2a04..e0e315c 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -178,6 +178,8 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
continue;
// Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber()
+ << " to top of loop.\n");
Changed = true;
// Move it and all the blocks that can reach it via fallthrough edges
@@ -297,6 +299,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
continue;
// Move the block.
+ DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber()
+ << " to be contiguous with loop.\n");
Changed = true;
// Process this block and all loop blocks contiguous with it, to keep
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index fd957b1..e3746a9 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -29,6 +30,7 @@ CriticalAntiDepBreaker::
CriticalAntiDepBreaker(MachineFunction& MFi) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
+ TII(MF.getTarget().getInstrInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF))
{
@@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
DefIndices[AliasReg] = ~0u;
}
}
- } else {
- // In a non-return block, examine the live-in regs of all successors.
- for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ }
+
+ // In a non-return block, examine the live-in regs of all successors.
+ // Note a return block can have successors if the return instruction is
+ // predicated.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
SE = BB->succ_end(); SI != SE; ++SI)
- for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
- }
+ unsigned Reg = *I;
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
+ // Repeat, for all aliases.
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
+ unsigned AliasReg = *Alias;
+ Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[AliasReg] = BB->size();
+ DefIndices[AliasReg] = ~0u;
}
- }
+ }
// Mark live-out callee-saved registers. In a return block this is
// all callee-saved registers. In non-return this is any
@@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) {
}
void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
+ // It's not safe to change register allocation for source operands of
+ // that have special allocation requirements. Also assume all registers
+ // used in a call must not be changed (ABI).
+ // FIXME: The issue with predicated instruction is more complex. We are being
+ // conservatively here because the kill markers cannot be trusted after
+ // if-conversion:
+ // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
+ // ...
+ // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395]
+ // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12]
+ // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8)
+ //
+ // The first R6 kill is not really a kill since it's killed by a predicated
+ // instruction which may not be executed. The second R6 def may or may not
+ // re-define R6 so it's not safe to change it since the last R6 use cannot be
+ // changed.
+ bool Special = MI->getDesc().isCall() ||
+ MI->getDesc().hasExtraSrcRegAllocReq() ||
+ TII->isPredicated(MI);
+
// Scan the register operands for this instruction and update
// Classes and RegRefs.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1))
RegRefs.insert(std::make_pair(Reg, &MO));
- // It's not safe to change register allocation for source operands of
- // that have special allocation requirements.
- if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) {
+ if (MO.isUse() && Special) {
if (KeepRegs.insert(Reg)) {
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg)
@@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
// Update liveness.
// Proceding upwards, registers that are defed but not used in this
// instruction are now dead.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0) continue;
- if (!MO.isDef()) continue;
- // Ignore two-addr defs.
- if (MI->isRegTiedToUseOperand(i)) continue;
-
- DefIndices[Reg] = Count;
- KillIndices[Reg] = ~0u;
- assert(((KillIndices[Reg] == ~0u) !=
- (DefIndices[Reg] == ~0u)) &&
- "Kill and Def maps aren't consistent for Reg!");
- KeepRegs.erase(Reg);
- Classes[Reg] = 0;
- RegRefs.erase(Reg);
- // Repeat, for all subregs.
- for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
- *Subreg; ++Subreg) {
- unsigned SubregReg = *Subreg;
- DefIndices[SubregReg] = Count;
- KillIndices[SubregReg] = ~0u;
- KeepRegs.erase(SubregReg);
- Classes[SubregReg] = 0;
- RegRefs.erase(SubregReg);
- }
- // Conservatively mark super-registers as unusable.
- for (const unsigned *Super = TRI->getSuperRegisters(Reg);
- *Super; ++Super) {
- unsigned SuperReg = *Super;
- Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+
+ if (!TII->isPredicated(MI)) {
+ // Predicated defs are modeled as read + write, i.e. similar to two
+ // address updates.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ if (!MO.isDef()) continue;
+ // Ignore two-addr defs.
+ if (MI->isRegTiedToUseOperand(i)) continue;
+
+ DefIndices[Reg] = Count;
+ KillIndices[Reg] = ~0u;
+ assert(((KillIndices[Reg] == ~0u) !=
+ (DefIndices[Reg] == ~0u)) &&
+ "Kill and Def maps aren't consistent for Reg!");
+ KeepRegs.erase(Reg);
+ Classes[Reg] = 0;
+ RegRefs.erase(Reg);
+ // Repeat, for all subregs.
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg) {
+ unsigned SubregReg = *Subreg;
+ DefIndices[SubregReg] = Count;
+ KillIndices[SubregReg] = ~0u;
+ KeepRegs.erase(SubregReg);
+ Classes[SubregReg] = 0;
+ RegRefs.erase(SubregReg);
+ }
+ // Conservatively mark super-registers as unusable.
+ for (const unsigned *Super = TRI->getSuperRegisters(Reg);
+ *Super; ++Super) {
+ unsigned SuperReg = *Super;
+ Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ }
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -334,10 +361,15 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
+ // Keep a map of the MachineInstr*'s back to the SUnit representing them.
+ // This is used for updating debug information.
+ DenseMap<MachineInstr*,const SUnit*> MISUnitMap;
+
// Find the node at the bottom of the critical path.
const SUnit *Max = 0;
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
const SUnit *SU = &SUnits[i];
+ MISUnitMap[SU->getInstr()] = SU;
if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
@@ -473,7 +505,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
PrescanInstruction(MI);
- if (MI->getDesc().hasExtraDefRegAllocReq())
+ // If MI's defs have a special allocation requirement, don't allow
+ // any def registers to be changed. Also assume all registers
+ // defined in a call must not be changed (ABI).
+ if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() ||
+ TII->isPredicated(MI))
// If this instruction's defs have special allocation requirement, don't
// break this anti-dependency.
AntiDepReg = 0;
@@ -485,7 +521,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
- if (MO.isUse() && AntiDepReg == Reg) {
+ if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
AntiDepReg = 0;
break;
}
@@ -519,8 +555,22 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
std::multimap<unsigned, MachineOperand *>::iterator>
Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
- Q = Range.first, QE = Range.second; Q != QE; ++Q)
+ Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
+ // If the SU for the instruction being updated has debug information
+ // related to the anti-dependency register, make sure to update that
+ // as well.
+ const SUnit *SU = MISUnitMap[Q->second->getParent()];
+ if (!SU) continue;
+ for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) {
+ MachineInstr *DI = SU->DbgInstrList[i];
+ assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg()
+ && "Non register dbg_value attached to SUnit!");
+ if (DI->getOperand(0).getReg() == AntiDepReg)
+ DI->getOperand(0).setReg(NewReg);
+ }
+ }
// We just went back in time and modified history; the
// liveness information for the anti-depenence reg is now
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index cc42dd2..5406300 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -22,15 +22,18 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include <map>
namespace llvm {
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
class CriticalAntiDepBreaker : public AntiDepBreaker {
MachineFunction& MF;
MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
/// AllocatableSet - The set of allocatable registers.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index f6739f4..01b31b4 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -22,6 +22,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/PromoteMemToReg.h"
@@ -87,10 +88,13 @@ namespace {
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
/// use the ".llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
- bool CleanupSelectors();
+ bool CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+ bool HasCatchAllInSelector(IntrinsicInst *);
/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
- void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+ void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels);
/// FindAllURoRInvokes - Find all URoR invokes in the function.
void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
@@ -150,7 +154,7 @@ namespace {
Changed = true;
}
- return false;
+ return Changed;
}
public:
@@ -186,25 +190,32 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) {
return new DwarfEHPrepare(tm, fast);
}
+/// HasCatchAllInSelector - Return true if the intrinsic instruction has a
+/// catch-all.
+bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) {
+ if (!EHCatchAllValue) return false;
+
+ unsigned ArgIdx = II->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(II->getArgOperand(ArgIdx));
+ return GV == EHCatchAllValue;
+}
+
/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
void DwarfEHPrepare::
-FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels,
+ SmallPtrSet<IntrinsicInst*, 32> &CatchAllSels) {
for (Value::use_iterator
I = SelectorIntrinsic->use_begin(),
E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *SI = cast<IntrinsicInst>(I);
- if (!SI || SI->getParent()->getParent() != F) continue;
-
- unsigned NumOps = SI->getNumOperands();
- if (NumOps > 4) continue;
- bool IsCleanUp = (NumOps == 3);
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
- if (!IsCleanUp)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3)))
- IsCleanUp = (CI->getZExtValue() == 0);
+ if (II->getParent()->getParent() != F)
+ continue;
- if (IsCleanUp)
- Sels.insert(SI);
+ if (!HasCatchAllInSelector(II))
+ Sels.insert(II);
+ else
+ CatchAllSels.insert(II);
}
}
@@ -222,7 +233,7 @@ FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
/// the ".llvm.eh.catch.all.value" call need to convert to using its
/// initializer instead.
-bool DwarfEHPrepare::CleanupSelectors() {
+bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
if (!EHCatchAllValue) return false;
if (!SelectorIntrinsic) {
@@ -232,17 +243,15 @@ bool DwarfEHPrepare::CleanupSelectors() {
}
bool Changed = false;
- for (Value::use_iterator
- I = SelectorIntrinsic->use_begin(),
- E = SelectorIntrinsic->use_end(); I != E; ++I) {
- IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I);
- if (!Sel || Sel->getParent()->getParent() != F) continue;
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ I = Sels.begin(), E = Sels.end(); I != E; ++I) {
+ IntrinsicInst *Sel = *I;
// Index of the ".llvm.eh.catch.all.value" variable.
- unsigned OpIdx = Sel->getNumOperands() - 1;
- GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx));
+ unsigned OpIdx = Sel->getNumArgOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getArgOperand(OpIdx));
if (GV != EHCatchAllValue) continue;
- Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer());
+ Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer());
Changed = true;
}
@@ -293,8 +302,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
/// function. This is a candidate to merge the selector associated with the URoR
/// invoke with the one from the URoR's landing pad.
bool DwarfEHPrepare::HandleURoRInvokes() {
- if (!DT) return CleanupSelectors(); // We require DominatorTree information.
-
if (!EHCatchAllValue) {
EHCatchAllValue =
F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
@@ -307,14 +314,20 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!SelectorIntrinsic) return false;
}
+ SmallPtrSet<IntrinsicInst*, 32> Sels;
+ SmallPtrSet<IntrinsicInst*, 32> CatchAllSels;
+ FindAllCleanupSelectors(Sels, CatchAllSels);
+
+ if (!DT)
+ // We require DominatorTree information.
+ return CleanupSelectors(CatchAllSels);
+
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors();
+ if (!URoR) return CleanupSelectors(CatchAllSels);
}
- SmallPtrSet<IntrinsicInst*, 32> Sels;
SmallPtrSet<InvokeInst*, 32> URoRInvokes;
- FindAllCleanupSelectors(Sels);
FindAllURoRInvokes(URoRInvokes);
SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
@@ -340,7 +353,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!ExceptionValueIntrinsic) {
ExceptionValueIntrinsic =
Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
- if (!ExceptionValueIntrinsic) return CleanupSelectors();
+ if (!ExceptionValueIntrinsic)
+ return CleanupSelectors(CatchAllSels);
}
for (Value::use_iterator
@@ -360,21 +374,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
// an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
// need to convert it to a 'catch-all'.
for (SmallPtrSet<IntrinsicInst*, 8>::iterator
- SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) {
- IntrinsicInst *II = *SI;
- unsigned NumOps = II->getNumOperands();
-
- if (NumOps <= 4) {
- bool IsCleanUp = (NumOps == 3);
-
- if (!IsCleanUp)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3)))
- IsCleanUp = (CI->getZExtValue() == 0);
-
- if (IsCleanUp)
- SelsToConvert.insert(II);
- }
- }
+ SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI)
+ if (!HasCatchAllInSelector(*SI))
+ SelsToConvert.insert(*SI);
}
}
}
@@ -388,12 +390,22 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
SI = SelsToConvert.begin(), SE = SelsToConvert.end();
SI != SE; ++SI) {
IntrinsicInst *II = *SI;
- SmallVector<Value*, 8> Args;
// Use the exception object pointer and the personality function
// from the original selector.
- Args.push_back(II->getOperand(1)); // Exception object pointer.
- Args.push_back(II->getOperand(2)); // Personality function.
+ CallSite CS(II);
+ IntrinsicInst::op_iterator I = CS.arg_begin();
+ IntrinsicInst::op_iterator E = CS.arg_end();
+ IntrinsicInst::op_iterator B = prior(E);
+
+ // Exclude last argument if it is an integer.
+ if (isa<ConstantInt>(B)) E = B;
+
+ // Add exception object pointer (front).
+ // Add personality function (next).
+ // Add in any filter IDs (rest).
+ SmallVector<Value*, 8> Args(I, E);
+
Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
CallInst *NewSelector =
@@ -409,7 +421,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
}
}
- Changed |= CleanupSelectors();
+ Changed |= CleanupSelectors(CatchAllSels);
return Changed;
}
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 8416d3b..36b0e65 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -90,7 +90,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
for (std::vector<MachineRelocation>::iterator MRI = JTRelocations.begin(),
MRE = JTRelocations.end(); MRI != MRE; ++MRI) {
MachineRelocation &MR = *MRI;
- unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
+ uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock());
MR.setResultPointer((void*)MBBOffset);
MR.setConstantVal(ES->SectionIdx);
JTSection.addRelocation(MR);
diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h
deleted file mode 100644
index 91c81a9..0000000
--- a/lib/CodeGen/ExactHazardRecognizer.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the ExactHazardRecognizer class, which
-// implements hazard-avoidance heuristics for scheduling, based on the
-// scheduling itineraries specified for the target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetInstrItineraries.h"
-
-namespace llvm {
- class ExactHazardRecognizer : public ScheduleHazardRecognizer {
- // ScoreBoard to track function unit usage. ScoreBoard[0] is a
- // mask of the FUs in use in the cycle currently being
- // schedule. ScoreBoard[1] is a mask for the next cycle. The
- // ScoreBoard is used as a circular buffer with the current cycle
- // indicated by Head.
- class ScoreBoard {
- unsigned *Data;
-
- // The maximum number of cycles monitored by the Scoreboard. This
- // value is determined based on the target itineraries to ensure
- // that all hazards can be tracked.
- size_t Depth;
- // Indices into the Scoreboard that represent the current cycle.
- size_t Head;
- public:
- ScoreBoard():Data(NULL), Depth(0), Head(0) { }
- ~ScoreBoard() {
- delete[] Data;
- }
-
- size_t getDepth() const { return Depth; }
- unsigned& operator[](size_t idx) const {
- assert(Depth && "ScoreBoard was not initialized properly!");
-
- return Data[(Head + idx) % Depth];
- }
-
- void reset(size_t d = 1) {
- if (Data == NULL) {
- Depth = d;
- Data = new unsigned[Depth];
- }
-
- memset(Data, 0, Depth * sizeof(Data[0]));
- Head = 0;
- }
-
- void advance() {
- Head = (Head + 1) % Depth;
- }
-
- // Print the scoreboard.
- void dump() const;
- };
-
- // Itinerary data for the target.
- const InstrItineraryData &ItinData;
-
- ScoreBoard ReservedScoreboard;
- ScoreBoard RequiredScoreboard;
-
- public:
- ExactHazardRecognizer(const InstrItineraryData &ItinData);
-
- virtual HazardType getHazardType(SUnit *SU);
- virtual void Reset();
- virtual void EmitInstruction(SUnit *SU);
- virtual void AdvanceCycle();
- };
-}
-
-#endif
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 790cb21..71506cc 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -271,7 +271,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcwrite:
if (LowerWr) {
// Replace a write barrier with a simple store.
- Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI);
+ Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI);
CI->replaceAllUsesWith(St);
CI->eraseFromParent();
}
@@ -279,7 +279,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
case Intrinsic::gcread:
if (LowerRd) {
// Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getOperand(2), "", CI);
+ Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
Ld->takeName(CI);
CI->replaceAllUsesWith(Ld);
CI->eraseFromParent();
@@ -290,7 +290,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) {
// Initialize the GC root, but do not delete the intrinsic. The
// backend needs the intrinsic to flag the stack slot.
Roots.push_back(cast<AllocaInst>(
- CI->getOperand(1)->stripPointerCasts()));
+ CI->getArgOperand(0)->stripPointerCasts()));
}
break;
default:
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index c61fd17..6b445e0 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -33,20 +34,22 @@ using namespace llvm;
static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden);
static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden);
static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden);
-static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
+static cl::opt<bool> DisableSimple("disable-ifcvt-simple",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
+static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
+static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
+static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
+static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
+static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
cl::init(false), cl::Hidden);
-static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
+static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
+static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
+ cl::init(true), cl::Hidden);
STATISTIC(NumSimple, "Number of simple if-conversions performed");
STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed");
@@ -115,7 +118,7 @@ namespace {
BB(0), TrueBB(0), FalseBB(0) {}
};
- /// IfcvtToken - Record information about pending if-conversions to attemp:
+ /// IfcvtToken - Record information about pending if-conversions to attempt:
/// BBI - Corresponding BBInfo.
/// Kind - Type of block. See IfcvtKind.
/// NeedSubsumption - True if the to-be-predicated BB has already been
@@ -146,6 +149,7 @@ namespace {
const TargetLowering *TLI;
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
bool MadeChange;
int FnNum;
public:
@@ -167,8 +171,7 @@ namespace {
std::vector<IfcvtToken*> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
bool isTriangle = false, bool RevBranch = false);
- bool AnalyzeBlocks(MachineFunction &MF,
- std::vector<IfcvtToken*> &Tokens);
+ void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens);
void InvalidatePreds(MachineBasicBlock *BB);
void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
@@ -177,14 +180,22 @@ namespace {
unsigned NumDups1, unsigned NumDups2);
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
- SmallVectorImpl<MachineOperand> &Cond);
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs);
void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr = false);
- void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI);
+ void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
- bool MeetIfcvtSizeLimit(unsigned Size) const {
- return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit();
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
+ return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+ }
+
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
+ MachineBasicBlock &FBB, unsigned FSize) const {
+ return TSize > 0 && FSize > 0 &&
+ TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
}
// blockAlwaysFallThrough - Block ends without a terminator.
@@ -227,8 +238,15 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
if (!TII) return false;
+ // Tail merge tend to expose more if-conversion opportunities.
+ BranchFolder BF(true);
+ bool BFChange = BF.OptimizeFunction(MF, TII,
+ MF.getTarget().getRegisterInfo(),
+ getAnalysisIfAvailable<MachineModuleInfo>());
+
DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
<< MF.getFunction()->getName() << "\'");
@@ -253,7 +271,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) {
// Do an initial analysis for each basic block and find all the potential
// candidates to perform if-conversion.
- bool Change = AnalyzeBlocks(MF, Tokens);
+ bool Change = false;
+ AnalyzeBlocks(MF, Tokens);
while (!Tokens.empty()) {
IfcvtToken *Token = Tokens.back();
Tokens.pop_back();
@@ -281,7 +300,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
case ICSimpleFalse: {
bool isFalse = Kind == ICSimpleFalse;
if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break;
- DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"")
+ DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ?
+ " false" : "")
<< "): BB#" << BBI.BB->getNumber() << " ("
<< ((Kind == ICSimpleFalse)
? BBI.FalseBB->getNumber()
@@ -289,8 +309,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
RetVal = IfConvertSimple(BBI, Kind);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
- if (isFalse) NumSimpleFalse++;
- else NumSimple++;
+ if (isFalse) ++NumSimpleFalse;
+ else ++NumSimple;
}
break;
}
@@ -316,11 +336,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
if (RetVal) {
if (isFalse) {
- if (isRev) NumTriangleFRev++;
- else NumTriangleFalse++;
+ if (isRev) ++NumTriangleFRev;
+ else ++NumTriangleFalse;
} else {
- if (isRev) NumTriangleRev++;
- else NumTriangle++;
+ if (isRev) ++NumTriangleRev;
+ else ++NumTriangle;
}
}
break;
@@ -332,7 +352,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
<< BBI.FalseBB->getNumber() << ") ");
RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
- if (RetVal) NumDiamonds++;
+ if (RetVal) ++NumDiamonds;
break;
}
}
@@ -361,13 +381,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
Roots.clear();
BBAnalysis.clear();
- if (MadeChange) {
+ if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false);
BF.OptimizeFunction(MF, TII,
MF.getTarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>());
}
+ MadeChange |= BFChange;
return MadeChange;
}
@@ -387,9 +408,10 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
/// ReverseBranchCondition - Reverse the condition of the end of the block
/// branch. Swap block's 'true' and 'false' successors.
bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+ DebugLoc dl; // FIXME: this is nowhere
if (!TII->ReverseBranchCondition(BBI.BrCond)) {
TII->RemoveBranch(*BBI.BB);
- TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond);
+ TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
std::swap(BBI.TrueBB, BBI.FalseBB);
return true;
}
@@ -420,7 +442,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
if (TrueBBI.BB->pred_size() > 1) {
if (TrueBBI.CannotBeCopied ||
- TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit())
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
return false;
Dups = TrueBBI.NonPredSize;
}
@@ -431,7 +453,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid triangle shape for ifcvt.
/// If 'FalseBranch' is true, it checks if 'true' block's false branch
-/// branches to the false branch rather than the other way around. It also
+/// branches to the 'false' block rather than the other way around. It also
/// returns the number of instructions that the ifcvt would need to duplicate
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -457,7 +479,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
++Size;
}
}
- if (Size > TLI->getIfCvtDupBlockSizeLimit())
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
return false;
Dups = Size;
}
@@ -514,7 +536,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
- while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) {
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ // Skip dbg_value instructions
+ while (TI != TIE && TI->isDebugValue())
+ ++TI;
+ while (FI != FIE && FI->isDebugValue())
+ ++FI;
+ while (TI != TIE && FI != FIE) {
+ // Skip dbg_value instructions. These do not count.
+ if (TI->isDebugValue()) {
+ while (TI != TIE && TI->isDebugValue())
+ ++TI;
+ if (TI == TIE)
+ break;
+ }
+ if (FI->isDebugValue()) {
+ while (FI != FIE && FI->isDebugValue())
+ ++FI;
+ if (FI == FIE)
+ break;
+ }
if (!TI->isIdenticalTo(FI))
break;
++Dups1;
@@ -524,7 +566,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
TI = firstNonBranchInst(TrueBBI.BB, TII);
FI = firstNonBranchInst(FalseBBI.BB, TII);
- while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) {
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ // Skip dbg_value instructions at end of the bb's.
+ while (TI != TIB && TI->isDebugValue())
+ --TI;
+ while (FI != FIB && FI->isDebugValue())
+ --FI;
+ while (TI != TIB && FI != FIB) {
+ // Skip dbg_value instructions. These do not count.
+ if (TI->isDebugValue()) {
+ while (TI != TIB && TI->isDebugValue())
+ --TI;
+ if (TI == TIB)
+ break;
+ }
+ if (FI->isDebugValue()) {
+ while (FI != FIB && FI->isDebugValue())
+ --FI;
+ if (FI == FIB)
+ break;
+ }
if (!TI->isIdenticalTo(FI))
break;
++Dups2;
@@ -556,7 +618,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// No false branch. This BB must end with a conditional branch and a
// fallthrough.
if (!BBI.FalseBB)
- BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
+ BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
if (!BBI.FalseBB) {
// Malformed bcc? True and false blocks are the same?
BBI.IsUnpredicable = true;
@@ -569,6 +631,9 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
BBI.ClobbersPred = false;
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
+
const TargetInstrDesc &TID = I->getDesc();
if (TID.isNotDuplicable())
BBI.CannotBeCopied = true;
@@ -702,8 +767,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
+ *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
// Diamond:
@@ -720,7 +785,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
}
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:
// EBB
@@ -732,23 +797,23 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups));
Enqueued = true;
}
-
+
if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
if (ValidSimple(TrueBBI, Dups) &&
- MeetIfcvtSizeLimit(TrueBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
// Simple (split, no rejoin):
// EBB
// | \_
// | |
// | TBB---> exit
- // |
+ // |
// FBB
Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups));
Enqueued = true;
@@ -757,21 +822,21 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
if (CanRevCond) {
// Try the other path...
if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
Enqueued = true;
}
if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
if (ValidSimple(FalseBBI, Dups) &&
- MeetIfcvtSizeLimit(FalseBBI.NonPredSize) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
@@ -785,11 +850,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
}
/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
-/// candidates. It returns true if any CFG restructuring is done to expose more
-/// if-conversion opportunities.
-bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
+/// candidates.
+void IfConverter::AnalyzeBlocks(MachineFunction &MF,
std::vector<IfcvtToken*> &Tokens) {
- bool Change = false;
std::set<MachineBasicBlock*> Visited;
for (unsigned i = 0, e = Roots.size(); i != e; ++i) {
for (idf_ext_iterator<MachineBasicBlock*> I=idf_ext_begin(Roots[i],Visited),
@@ -801,20 +864,23 @@ bool IfConverter::AnalyzeBlocks(MachineFunction &MF,
// Sort to favor more complex ifcvt scheme.
std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
-
- return Change;
}
/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
/// that all the intervening blocks are empty (given BB can fall through to its
/// next block).
static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
- MachineFunction::iterator I = BB;
+ MachineFunction::iterator PI = BB;
+ MachineFunction::iterator I = llvm::next(PI);
MachineFunction::iterator TI = ToBB;
MachineFunction::iterator E = BB->getParent()->end();
- while (++I != TI)
- if (I == E || !I->empty())
+ while (I != TI) {
+ // Check isSuccessor to avoid case where the next block is empty, but
+ // it's not a successor.
+ if (I == E || !I->empty() || !PI->isSuccessor(I))
return false;
+ PI = I++;
+ }
return true;
}
@@ -836,8 +902,9 @@ void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
///
static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
const TargetInstrInfo *TII) {
+ DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 0> NoCond;
- TII->InsertBranch(*BB, ToBB, NULL, NoCond);
+ TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl);
}
/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
@@ -849,6 +916,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
}
+/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are
+/// modeled as read + write (sort like two-address instructions). These
+/// routines track register liveness and add implicit uses to if-converted
+/// instructions to conform to the model.
+static void InitPredRedefs(MachineBasicBlock *BB, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
+ E = BB->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ Redefs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Redefs.insert(*Subreg);
+ }
+}
+
+static void UpdatePredRedefs(MachineInstr *MI, SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI,
+ bool AddImpUse = false) {
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (MO.isKill()) {
+ Redefs.erase(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.erase(*SR);
+ }
+ }
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ unsigned Reg = Defs[i];
+ if (Redefs.count(Reg)) {
+ if (AddImpUse)
+ // Treat predicated update as read + write.
+ MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/,
+ true/*IsImp*/,false/*IsKill*/));
+ } else {
+ Redefs.insert(Reg);
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR)
+ Redefs.insert(*SR);
+ }
+ }
+}
+
+static void UpdatePredRedefs(MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallSet<unsigned,4> &Redefs,
+ const TargetRegisterInfo *TRI) {
+ while (I != E) {
+ UpdatePredRedefs(I, Redefs, TRI);
+ ++I;
+ }
+}
+
/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
///
bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
@@ -873,13 +1000,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
if (TII->ReverseBranchCondition(Cond))
assert(false && "Unable to reverse branch condition!");
+ // Initialize liveins to the first BB. These are potentiall redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs);
} else {
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
// Merge converted block into entry block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
@@ -922,6 +1055,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
BBInfo *CvtBBI = &TrueBBI;
BBInfo *NextBBI = &FalseBBI;
+ DebugLoc dl; // FIXME: this is nowhere
SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
@@ -957,21 +1091,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
}
}
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(CvtBBI->BB, Redefs, TRI);
+ InitPredRedefs(NextBBI->BB, Redefs, TRI);
+
bool HasEarlyExit = CvtBBI->FalseBB != NULL;
- bool DupBB = CvtBBI->BB->pred_size() > 1;
- if (DupBB) {
+ if (CvtBBI->BB->pred_size() > 1) {
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
// Copy instructions in the true block, predicate them, and add them to
// the entry block.
- CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
+ CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true);
} else {
// Predicate the 'true' block after removing its branch.
CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
- PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+ PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs);
// Now merge the entry of the triangle with the true block.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
- MergeBlocks(BBI, *CvtBBI);
+ MergeBlocks(BBI, *CvtBBI, false);
}
// If 'true' block has a 'false' successor, add an exit branch to it.
@@ -980,7 +1119,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
CvtBBI->BrCond.end());
if (TII->ReverseBranchCondition(RevCond))
assert(false && "Unable to reverse branch condition!");
- TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond);
+ TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl);
BBI.BB->addSuccessor(CvtBBI->FalseBB);
}
@@ -1009,7 +1148,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
RemoveExtraEdges(BBI);
// Update block info. BB can be iteratively if-converted.
- if (!IterIfcvt)
+ if (!IterIfcvt)
BBI.IsDone = true;
InvalidatePreds(BBI.BB);
CvtBBI->IsDone = true;
@@ -1044,9 +1183,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
return false;
}
- // Merge the 'true' and 'false' blocks by copying the instructions
- // from the 'false' block to the 'true' block. That is, unless the true
- // block would clobber the predicate, in that case, do the opposite.
+ // Put the predicated instructions from the 'true' block before the
+ // instructions from the 'false' block, unless the true block would clobber
+ // the predicate, in which case, do the opposite.
BBInfo *BBI1 = &TrueBBI;
BBInfo *BBI2 = &FalseBBI;
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
@@ -1071,39 +1210,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// Remove the conditional branch from entry to the blocks.
BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ // Initialize liveins to the first BB. These are potentially redefined by
+ // predicated instructions.
+ SmallSet<unsigned, 4> Redefs;
+ InitPredRedefs(BBI1->BB, Redefs, TRI);
+
// Remove the duplicated instructions at the beginnings of both paths.
MachineBasicBlock::iterator DI1 = BBI1->BB->begin();
MachineBasicBlock::iterator DI2 = BBI2->BB->begin();
+ MachineBasicBlock::iterator DIE1 = BBI1->BB->end();
+ MachineBasicBlock::iterator DIE2 = BBI2->BB->end();
+ // Skip dbg_value instructions
+ while (DI1 != DIE1 && DI1->isDebugValue())
+ ++DI1;
+ while (DI2 != DIE2 && DI2->isDebugValue())
+ ++DI2;
BBI1->NonPredSize -= NumDups1;
BBI2->NonPredSize -= NumDups1;
+
+ // Skip past the dups on each side separately since there may be
+ // differing dbg_value entries.
+ for (unsigned i = 0; i < NumDups1; ++DI1) {
+ if (!DI1->isDebugValue())
+ ++i;
+ }
while (NumDups1 != 0) {
- ++DI1;
++DI2;
- --NumDups1;
+ if (!DI2->isDebugValue())
+ --NumDups1;
}
+
+ UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI);
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
// Predicate the 'true' block after removing its branch.
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
DI1 = BBI1->BB->end();
- for (unsigned i = 0; i != NumDups2; ++i)
+ for (unsigned i = 0; i != NumDups2; ) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI1 != BBI1->BB->begin());
--DI1;
+ // skip dbg_value instructions
+ if (!DI1->isDebugValue())
+ ++i;
+ }
BBI1->BB->erase(DI1, BBI1->BB->end());
- PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1);
+ PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs);
// Predicate the 'false' block.
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
+ // NumDups2 only counted non-dbg_value instructions, so this won't
+ // run off the head of the list.
+ assert (DI2 != BBI2->BB->begin());
--DI2;
- --NumDups2;
+ // skip dbg_value instructions
+ if (!DI2->isDebugValue())
+ --NumDups2;
}
- PredicateBlock(*BBI2, DI2, *Cond2);
+ PredicateBlock(*BBI2, DI2, *Cond2, Redefs);
// Merge the true block into the entry of the diamond.
- MergeBlocks(BBI, *BBI1);
- MergeBlocks(BBI, *BBI2);
+ MergeBlocks(BBI, *BBI1, TailBB == 0);
+ MergeBlocks(BBI, *BBI2, TailBB == 0);
// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then
@@ -1111,16 +1283,32 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
// tail, add a unconditional branch to it.
if (TailBB) {
BBInfo TailBBI = BBAnalysis[TailBB->getNumber()];
- if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) {
- BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+ bool CanMergeTail = !TailBBI.HasFallThrough;
+ // There may still be a fall-through edge from BBI1 or BBI2 to TailBB;
+ // check if there are any other predecessors besides those.
+ unsigned NumPreds = TailBB->pred_size();
+ if (NumPreds > 1)
+ CanMergeTail = false;
+ else if (NumPreds == 1 && CanMergeTail) {
+ MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
+ if (*PI != BBI1->BB && *PI != BBI2->BB)
+ CanMergeTail = false;
+ }
+ if (CanMergeTail) {
MergeBlocks(BBI, TailBBI);
TailBBI.IsDone = true;
} else {
+ BBI.BB->addSuccessor(TailBB);
InsertUncondBranch(BBI.BB, TailBB, TII);
BBI.HasFallThrough = false;
}
}
+ // RemoveExtraEdges won't work if the block has an unanalyzable branch,
+ // which can happen here if TailBB is unanalyzable and is merged, so
+ // explicitly remove BBI1 and BBI2 as successors.
+ BBI.BB->removeSuccessor(BBI1->BB);
+ BBI.BB->removeSuccessor(BBI2->BB);
RemoveExtraEdges(BBI);
// Update block info.
@@ -1135,9 +1323,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
/// specified end with the specified condition.
void IfConverter::PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
- SmallVectorImpl<MachineOperand> &Cond) {
+ SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs) {
for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) {
- if (TII->isPredicated(I))
+ if (I->isDebugValue() || TII->isPredicated(I))
continue;
if (!TII->PredicateInstruction(I, Cond)) {
#ifndef NDEBUG
@@ -1145,6 +1334,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
#endif
llvm_unreachable(0);
}
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(I, Redefs, TRI, true);
}
std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate));
@@ -1152,48 +1345,55 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
BBI.IsAnalyzed = false;
BBI.NonPredSize = 0;
- NumIfConvBBs++;
+ ++NumIfConvBBs;
}
/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
/// the destination block. Skip end of block branches if IgnoreBr is true.
void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
SmallVectorImpl<MachineOperand> &Cond,
+ SmallSet<unsigned, 4> &Redefs,
bool IgnoreBr) {
MachineFunction &MF = *ToBBI.BB->getParent();
for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
E = FromBBI.BB->end(); I != E; ++I) {
const TargetInstrDesc &TID = I->getDesc();
- bool isPredicated = TII->isPredicated(I);
// Do not copy the end of the block branches.
- if (IgnoreBr && !isPredicated && TID.isBranch())
+ if (IgnoreBr && TID.isBranch())
break;
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
- if (!isPredicated)
+ if (!TII->isPredicated(I) && !MI->isDebugValue()) {
if (!TII->PredicateInstruction(MI, Cond)) {
#ifndef NDEBUG
dbgs() << "Unable to predicate " << *I << "!\n";
#endif
llvm_unreachable(0);
}
+ }
+
+ // If the predicated instruction now redefines a register as the result of
+ // if-conversion, add an implicit kill.
+ UpdatePredRedefs(MI, Redefs, TRI, true);
}
- std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
- FromBBI.BB->succ_end());
- MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
- MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
+ if (!IgnoreBr) {
+ std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
+ FromBBI.BB->succ_end());
+ MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+ MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL;
- for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- MachineBasicBlock *Succ = Succs[i];
- // Fallthrough edge can't be transferred.
- if (Succ == FallThrough)
- continue;
- ToBBI.BB->addSuccessor(Succ);
+ for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
+ MachineBasicBlock *Succ = Succs[i];
+ // Fallthrough edge can't be transferred.
+ if (Succ == FallThrough)
+ continue;
+ ToBBI.BB->addSuccessor(Succ);
+ }
}
std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(),
@@ -1203,25 +1403,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.IsAnalyzed = false;
- NumDupBBs++;
+ ++NumDupBBs;
}
/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
-///
-void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
+/// This will leave FromBB as an empty block, so remove all of its
+/// successor edges except for the fall-through edge. If AddEdges is true,
+/// i.e., when FromBBI's branch is being moved, add those successor edges to
+/// ToBBI.
+void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
ToBBI.BB->splice(ToBBI.BB->end(),
FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end());
- // Redirect all branches to FromBB to ToBB.
- std::vector<MachineBasicBlock *> Preds(FromBBI.BB->pred_begin(),
- FromBBI.BB->pred_end());
- for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
- MachineBasicBlock *Pred = Preds[i];
- if (Pred == ToBBI.BB)
- continue;
- Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB);
- }
-
std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
FromBBI.BB->succ_end());
MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
@@ -1233,7 +1426,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) {
if (Succ == FallThrough)
continue;
FromBBI.BB->removeSuccessor(Succ);
- ToBBI.BB->addSuccessor(Succ);
+ if (AddEdges)
+ ToBBI.BB->addSuccessor(Succ);
}
// Now FromBBI always falls through to the next block!
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
new file mode 100644
index 0000000..12adcaa
--- /dev/null
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -0,0 +1,408 @@
+//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The inline spiller modifies the machine function directly instead of
+// inserting spills and restores in VirtRegMap.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spiller"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class InlineSpiller : public Spiller {
+ MachineFunction &mf_;
+ LiveIntervals &lis_;
+ VirtRegMap &vrm_;
+ MachineFrameInfo &mfi_;
+ MachineRegisterInfo &mri_;
+ const TargetInstrInfo &tii_;
+ const TargetRegisterInfo &tri_;
+ const BitVector reserved_;
+
+ // Variables that are valid during spill(), but used by multiple methods.
+ LiveInterval *li_;
+ std::vector<LiveInterval*> *newIntervals_;
+ const TargetRegisterClass *rc_;
+ int stackSlot_;
+ const SmallVectorImpl<LiveInterval*> *spillIs_;
+
+ // Values of the current interval that can potentially remat.
+ SmallPtrSet<VNInfo*, 8> reMattable_;
+
+ // Values in reMattable_ that failed to remat at some point.
+ SmallPtrSet<VNInfo*, 8> usedValues_;
+
+ ~InlineSpiller() {}
+
+public:
+ InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
+ : mf_(*mf), lis_(*lis), vrm_(*vrm),
+ mfi_(*mf->getFrameInfo()),
+ mri_(mf->getRegInfo()),
+ tii_(*mf->getTarget().getInstrInfo()),
+ tri_(*mf->getTarget().getRegisterInfo()),
+ reserved_(tri_.getReservedRegs(mf_)) {}
+
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex);
+
+private:
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx);
+ bool reMaterializeFor(MachineBasicBlock::iterator MI);
+ void reMaterializeAll();
+
+ bool foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops);
+ void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+ void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
+};
+}
+
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunction *mf,
+ LiveIntervals *lis,
+ const MachineLoopInfo *mli,
+ VirtRegMap *vrm) {
+ return new InlineSpiller(mf, lis, vrm);
+}
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
+ continue;
+ // We don't want to move any defs.
+ if (MO.isDef())
+ return false;
+ // We cannot depend on virtual registers in spillIs_. They will be spilled.
+ for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
+ if ((*spillIs_)[si]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &LI = lis_.getInterval(MO.getReg());
+ const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != LI.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reloading it.
+bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
+ SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
+ VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+ if (!OrigVNI) {
+ DEBUG(dbgs() << "\tadding <undef> flags: ");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+ MO.setIsUndef();
+ }
+ DEBUG(dbgs() << UseIdx << '\t' << *MI);
+ return true;
+ }
+ if (!reMattable_.count(OrigVNI)) {
+ DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
+ << UseIdx << '\t' << *MI);
+ return false;
+ }
+ MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
+ if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+ usedValues_.insert(OrigVNI);
+ DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
+ return false;
+ }
+
+ // If the instruction also writes li_->reg, it had better not require the same
+ // register for uses and defs.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+ if (Writes) {
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) {
+ usedValues_.insert(OrigVNI);
+ DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI);
+ return false;
+ }
+ }
+ }
+
+ // Alocate a new register for the remat.
+ unsigned NewVReg = mri_.createVirtualRegister(rc_);
+ vrm_.grow();
+ LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ NewLI.markNotSpillable();
+ newIntervals_->push_back(&NewLI);
+
+ // Finally we can rematerialize OrigMI before MI.
+ MachineBasicBlock &MBB = *MI->getParent();
+ tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
+ MachineBasicBlock::iterator RematMI = MI;
+ SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI);
+
+ // Replace operands
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
+ MO.setReg(NewVReg);
+ MO.setIsKill();
+ }
+ }
+ DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
+
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ return true;
+}
+
+/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// and trim the live ranges after.
+void InlineSpiller::reMaterializeAll() {
+ // Do a quick scan of the interval values to find if any are remattable.
+ reMattable_.clear();
+ usedValues_.clear();
+ for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
+ E = li_->vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused() || !VNI->isDefAccurate())
+ continue;
+ MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
+ if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
+ continue;
+ reMattable_.insert(VNI);
+ }
+
+ // Often, no defs are remattable.
+ if (reMattable_.empty())
+ return;
+
+ // Try to remat before all uses of li_->reg.
+ bool anyRemat = false;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ RI = mri_.use_nodbg_begin(li_->reg);
+ MachineInstr *MI = RI.skipInstruction();)
+ anyRemat |= reMaterializeFor(MI);
+
+ if (!anyRemat)
+ return;
+
+ // Remove any values that were completely rematted.
+ bool anyRemoved = false;
+ for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
+ E = reMattable_.end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->hasPHIKill() || usedValues_.count(VNI))
+ continue;
+ MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
+ DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
+ lis_.RemoveMachineInstrFromMaps(DefMI);
+ vrm_.RemoveMachineInstrFromMaps(DefMI);
+ DefMI->eraseFromParent();
+ li_->removeValNo(VNI);
+ anyRemoved = true;
+ }
+
+ if (!anyRemoved)
+ return;
+
+ // Removing values may cause debug uses where li_ is not live.
+ for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+ if (!MI->isDebugValue())
+ continue;
+ // Try to preserve the debug value if li_ is live immediately after it.
+ MachineBasicBlock::iterator NextMI = MI;
+ ++NextMI;
+ if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
+ SlotIndex NearIdx = lis_.getInstructionIndex(NextMI);
+ if (li_->liveAt(NearIdx))
+ continue;
+ }
+ DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+}
+
+/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
+/// Return true on success, and MI will be erased.
+bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
+ const SmallVectorImpl<unsigned> &Ops) {
+ // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
+ // operands.
+ SmallVector<unsigned, 8> FoldOps;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ unsigned Idx = Ops[i];
+ MachineOperand &MO = MI->getOperand(Idx);
+ if (MO.isImplicit())
+ continue;
+ // FIXME: Teach targets to deal with subregs.
+ if (MO.getSubReg())
+ return false;
+ // Tied use operands should not be passed to foldMemoryOperand.
+ if (!MI->isRegTiedToDefOperand(Idx))
+ FoldOps.push_back(Idx);
+ }
+
+ MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+ if (!FoldMI)
+ return false;
+ lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
+ vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+ MI->eraseFromParent();
+ DEBUG(dbgs() << "\tfolded: " << *FoldMI);
+ return true;
+}
+
+/// insertReload - Insert a reload of NewLI.reg before MI.
+void InlineSpiller::insertReload(LiveInterval &NewLI,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_);
+ --MI; // Point to load instruction.
+ SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ vrm_.addSpillSlotUse(stackSlot_, MI);
+ DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
+}
+
+/// insertSpill - Insert a spill of NewLI.reg after MI.
+void InlineSpiller::insertSpill(LiveInterval &NewLI,
+ MachineBasicBlock::iterator MI) {
+ MachineBasicBlock &MBB = *MI->getParent();
+ SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
+ --MI; // Point to store instruction.
+ SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
+ vrm_.addSpillSlotUse(stackSlot_, MI);
+ DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
+ lis_.getVNInfoAllocator());
+ NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
+}
+
+void InlineSpiller::spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex) {
+ DEBUG(dbgs() << "Inline spilling " << *li << "\n");
+ assert(li->isSpillable() && "Attempting to spill already spilled value.");
+ assert(!li->isStackSlot() && "Trying to spill a stack slot.");
+
+ li_ = li;
+ newIntervals_ = &newIntervals;
+ rc_ = mri_.getRegClass(li->reg);
+ spillIs_ = &spillIs;
+
+ reMaterializeAll();
+
+ // Remat may handle everything.
+ if (li_->empty())
+ return;
+
+ stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+
+ // Iterate over instructions using register.
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+ MachineInstr *MI = RI.skipInstruction();) {
+
+ // Debug values are not allowed to affect codegen.
+ if (MI->isDebugValue()) {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr = MI->getOperand(2).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_,
+ Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else {
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ MI->eraseFromParent();
+ }
+ continue;
+ }
+
+ // Analyze instruction.
+ bool Reads, Writes;
+ SmallVector<unsigned, 8> Ops;
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+
+ // Attempt to fold memory ops.
+ if (foldMemoryOperand(MI, Ops))
+ continue;
+
+ // Allocate interval around instruction.
+ // FIXME: Infer regclass from instruction alone.
+ unsigned NewVReg = mri_.createVirtualRegister(rc_);
+ vrm_.grow();
+ LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ NewLI.markNotSpillable();
+
+ if (Reads)
+ insertReload(NewLI, MI);
+
+ // Rewrite instruction operands.
+ bool hasLiveDef = false;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(Ops[i]);
+ MO.setReg(NewVReg);
+ if (MO.isUse()) {
+ if (!MI->isRegTiedToDefOperand(Ops[i]))
+ MO.setIsKill();
+ } else {
+ if (!MO.isDead())
+ hasLiveDef = true;
+ }
+ }
+
+ // FIXME: Use a second vreg if instruction has no tied ops.
+ if (Writes && hasLiveDef)
+ insertSpill(NewLI, MI);
+
+ DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
+ newIntervals.push_back(&NewLI);
+ }
+}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 63bb5f2..03ae214 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -16,6 +16,7 @@
#include "llvm/Module.h"
#include "llvm/Type.h"
#include "llvm/CodeGen/IntrinsicLowering.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
@@ -314,21 +315,22 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) {
static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
const char *Dname,
const char *LDname) {
- switch (CI->getOperand(1)->getType()->getTypeID()) {
+ CallSite CS(CI);
+ switch (CI->getArgOperand(0)->getType()->getTypeID()) {
default: llvm_unreachable("Invalid type in intrinsic");
case Type::FloatTyID:
- ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(),
Type::getFloatTy(CI->getContext()));
break;
case Type::DoubleTyID:
- ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(),
Type::getDoubleTy(CI->getContext()));
break;
case Type::X86_FP80TyID:
case Type::FP128TyID:
case Type::PPC_FP128TyID:
- ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
- CI->getOperand(1)->getType());
+ ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(),
+ CI->getArgOperand(0)->getType());
break;
}
}
@@ -340,6 +342,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
const Function *Callee = CI->getCalledFunction();
assert(Callee && "Cannot lower an indirect call!");
+ CallSite CS(CI);
switch (Callee->getIntrinsicID()) {
case Intrinsic::not_intrinsic:
report_fatal_error("Cannot lower a call to a non-intrinsic function '"+
@@ -353,7 +356,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
// by the lowerinvoke pass. In both cases, the right thing to do is to
// convert the call to an explicit setjmp or longjmp call.
case Intrinsic::setjmp: {
- Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(),
Type::getInt32Ty(Context));
if (!CI->getType()->isVoidTy())
CI->replaceAllUsesWith(V);
@@ -365,32 +368,32 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
break;
case Intrinsic::longjmp: {
- ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
+ ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
case Intrinsic::siglongjmp: {
// Insert the call to abort
- ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
+ ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(),
Type::getVoidTy(Context));
break;
}
case Intrinsic::ctpop:
- CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::bswap:
- CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::ctlz:
- CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI));
+ CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI));
break;
case Intrinsic::cttz: {
// cttz(x) -> ctpop(~X & (X-1))
- Value *Src = CI->getOperand(1);
+ Value *Src = CI->getArgOperand(0);
Value *NotSrc = Builder.CreateNot(Src);
NotSrc->setName(Src->getName() + ".not");
Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
@@ -451,37 +454,37 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
case Intrinsic::memcpy: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
- Ops[1] = CI->getOperand(2);
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
Ops[2] = Size;
- ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::memmove: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
- Ops[1] = CI->getOperand(2);
+ Ops[0] = CI->getArgOperand(0);
+ Ops[1] = CI->getArgOperand(1);
Ops[2] = Size;
- ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::memset: {
const IntegerType *IntPtr = TD.getIntPtrType(Context);
- Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
+ Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr,
/* isSigned */ false);
Value *Ops[3];
- Ops[0] = CI->getOperand(1);
+ Ops[0] = CI->getArgOperand(0);
// Extend the amount to i32.
- Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context),
+ Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context),
/* isSigned */ false);
Ops[2] = Size;
- ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
+ ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType());
break;
}
case Intrinsic::sqrt: {
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index b584704..bf3137e 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -329,12 +329,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None)
PM.add(createOptimizePHIsPass());
- // Delete dead machine instructions regardless of optimization level.
- PM.add(createDeadMachineInstructionElimPass());
- printAndVerify(PM, "After codegen DCE pass",
- /* allowDoubleDefs= */ true);
-
if (OptLevel != CodeGenOpt::None) {
+ // With optimization, dead code should already be eliminated. However
+ // there is one known exception: lowered code for arguments that are only
+ // used by tail calls, where the tail calls reuse the incoming stack
+ // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
+ PM.add(createDeadMachineInstructionElimPass());
+ printAndVerify(PM, "After codegen DCE pass",
+ /* allowDoubleDefs= */ true);
+
PM.add(createOptimizeExtsPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
@@ -358,7 +361,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
/* allowDoubleDefs= */ true);
// Perform register allocation.
- PM.add(createRegisterAllocator());
+ PM.add(createRegisterAllocator(OptLevel));
printAndVerify(PM, "After Register Allocation");
// Perform stack slot coloring and post-ra machine LICM.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 03b4eab..b9527fa 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -118,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
SUnit *LatencyPriorityQueue::pop() {
if (empty()) return NULL;
std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 025ad05..21a9b7d 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -68,6 +68,37 @@ bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
return r->end == I;
}
+/// killedAt - Return true if a live range ends at index. Note that the kill
+/// point is not contained in the half-open live range. It is usually the
+/// getDefIndex() slot following its last use.
+bool LiveInterval::killedAt(SlotIndex I) const {
+ Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
+
+ // Now r points to the first interval with start >= I, or ranges.end().
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ // Now r points to the last interval with end <= I.
+ // r->end is the kill point.
+ return r->end == I;
+}
+
+/// killedInRange - Return true if the interval has kills in [Start,End).
+bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
+ Ranges::const_iterator r =
+ std::lower_bound(ranges.begin(), ranges.end(), End);
+
+ // Now r points to the first interval with start >= End, or ranges.end().
+ if (r == ranges.begin())
+ return false;
+
+ --r;
+ // Now r points to the last interval with end <= End.
+ // r->end is the kill point.
+ return r->end >= Start && r->end < End;
+}
+
// overlaps - Return true if the intersection of the two live intervals is
// not empty.
//
@@ -149,7 +180,6 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
assert(I != ranges.end() && "Not a valid interval!");
VNInfo *ValNo = I->valno;
- SlotIndex OldEnd = I->end;
// Search for the first interval that we can't merge with.
Ranges::iterator MergeTo = next(I);
@@ -163,9 +193,6 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) {
// Erase any dead ranges.
ranges.erase(next(I), MergeTo);
- // Update kill info.
- ValNo->removeKills(OldEnd, I->end.getPrevSlot());
-
// If the newly formed range now touches the range after it and if they have
// the same value number, merge the two ranges into one range.
Ranges::iterator Next = next(I);
@@ -245,9 +272,6 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
// endpoint as well.
if (End > it->end)
extendIntervalEndTo(it, End);
- else if (End < it->end)
- // Overlapping intervals, there might have been a kill here.
- it->valno->removeKill(End);
return it;
}
} else {
@@ -288,7 +312,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
VNInfo *ValNo = I->valno;
if (I->start == Start) {
if (I->end == End) {
- ValNo->removeKills(Start, End);
if (RemoveDeadValNo) {
// Check if val# is dead.
bool isDead = true;
@@ -296,7 +319,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
if (II != I && II->valno == ValNo) {
isDead = false;
break;
- }
+ }
if (isDead) {
// Now that ValNo is dead, remove it. If it is the largest value
// number, just nuke it (and any other deleted values neighboring it),
@@ -320,7 +343,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
// Otherwise if the span we are removing is at the end of the LiveRange,
// adjust the other way.
if (I->end == End) {
- ValNo->removeKills(Start, End);
I->end = Start;
return;
}
@@ -529,6 +551,7 @@ void LiveInterval::MergeValueInAsValue(
SmallVector<VNInfo*, 4> ReplacedValNos;
iterator IP = begin();
for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
+ assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo");
if (I->valno != RHSValNo)
continue;
SlotIndex Start = I->start, End = I->end;
@@ -823,10 +846,12 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
else {
OS << " = ";
for (LiveInterval::Ranges::const_iterator I = ranges.begin(),
- E = ranges.end(); I != E; ++I)
- OS << *I;
+ E = ranges.end(); I != E; ++I) {
+ OS << *I;
+ assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo");
+ }
}
-
+
// Print value number info.
if (getNumValNums()) {
OS << " ";
@@ -843,21 +868,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << "?";
else
OS << vni->def;
- unsigned ee = vni->kills.size();
- if (ee || vni->hasPHIKill()) {
- OS << "-(";
- for (unsigned j = 0; j != ee; ++j) {
- OS << vni->kills[j];
- if (j != ee-1)
- OS << " ";
- }
- if (vni->hasPHIKill()) {
- if (ee)
- OS << " ";
- OS << "phi";
- }
- OS << ")";
- }
}
}
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index a6d38ad..194d03d 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -50,9 +50,6 @@ using namespace llvm;
static cl::opt<bool> DisableReMat("disable-rematerialization",
cl::init(false), cl::Hidden);
-static cl::opt<bool> EnableFastSpilling("fast-spill",
- cl::init(false), cl::Hidden);
-
STATISTIC(numIntervals , "Number of original intervals");
STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
@@ -90,8 +87,8 @@ void LiveIntervals::releaseMemory() {
r2iMap_.clear();
- // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.DestroyAll();
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
while (!CloneMIs.empty()) {
MachineInstr *MI = CloneMIs.back();
CloneMIs.pop_back();
@@ -195,6 +192,10 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
if (SrcReg == li.reg || DstReg == li.reg)
continue;
+ if (MI.isCopy())
+ if (MI.getOperand(0).getReg() == li.reg ||
+ MI.getOperand(1).getReg() == li.reg)
+ continue;
// Check for operands using reg
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@@ -218,10 +219,7 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
return false;
}
-/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
-/// it checks for sub-register reference and it can check use as well.
-bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
- unsigned Reg, bool CheckUse,
+bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
for (LiveInterval::Ranges::const_iterator
I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
@@ -239,12 +237,11 @@ bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
MachineOperand& MO = MI->getOperand(i);
if (!MO.isReg())
continue;
- if (MO.isUse() && !CheckUse)
- continue;
unsigned PhysReg = MO.getReg();
- if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg))
+ if (PhysReg == 0 || PhysReg == Reg ||
+ TargetRegisterInfo::isVirtualRegister(PhysReg))
continue;
- if (tri_->isSubRegister(Reg, PhysReg))
+ if (tri_->regsOverlap(Reg, PhysReg))
return true;
}
}
@@ -272,7 +269,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
if (MO.getReg() == Reg && MO.isDef()) {
assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() &&
MI.getOperand(MOIdx).getSubReg() &&
- MO.getSubReg());
+ (MO.getSubReg() || MO.isImplicit()));
return true;
}
}
@@ -328,9 +325,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (mi->isCopyLike() ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
CopyMI = mi;
+ }
VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
VNInfoAllocator);
@@ -356,7 +354,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
LiveRange LR(defIndex, killIdx, ValNo);
interval.addRange(LR);
DEBUG(dbgs() << " +" << LR << "\n");
- ValNo->addKill(killIdx);
return;
}
}
@@ -376,7 +373,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// valno in the killing blocks.
assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
DEBUG(dbgs() << " phi-join");
- ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
} else {
// Iterate over all of the blocks that the variable is completely
@@ -407,7 +403,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
}
LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
- ValNo->addKill(killIdx);
DEBUG(dbgs() << " +" << LR);
}
@@ -434,11 +429,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// are actually two values in the live interval. Because of this we
// need to take the LiveRegion that defines this register and split it
// into two values.
- // Two-address vregs should always only be redefined once. This means
- // that at this point, there should be exactly one value number in it.
- assert((PartReDef || interval.containsOneValue()) &&
- "Unexpected 2-addr liveint!");
- SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
SlotIndex RedefIndex = MIIdx.getDefIndex();
if (MO.isEarlyClobber())
RedefIndex = MIIdx.getUseIndex();
@@ -446,8 +436,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
const LiveRange *OldLR =
interval.getLiveRangeContaining(RedefIndex.getUseIndex());
VNInfo *OldValNo = OldLR->valno;
+ SlotIndex DefIndex = OldValNo->def.getDefIndex();
- // Delete the initial value, which should be short and continuous,
+ // Delete the previous value, which should be short and continuous,
// because the 2-addr copy must be in the same MBB as the redef.
interval.removeRange(DefIndex, RedefIndex);
@@ -464,15 +455,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (PartReDef &&
- tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (PartReDef && (mi->isCopyLike() ||
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)))
OldValNo->setCopy(&*mi);
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
DEBUG(dbgs() << " replace range with " << LR);
interval.addRange(LR);
- ValNo->addKill(RedefIndex);
// If this redefinition is dead, we need to add a dummy unit live
// range covering the def slot.
@@ -496,7 +486,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
VNInfo *ValNo;
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()||
+ if (mi->isCopyLike() ||
tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = mi;
ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
@@ -504,7 +494,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
- ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
} else {
@@ -600,7 +589,6 @@ exit:
ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
- LR.valno->addKill(end);
DEBUG(dbgs() << " +" << LR << '\n');
}
@@ -615,7 +603,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
else if (allocatableRegs_[MO.getReg()]) {
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() ||
+ if (MI->isCopyLike() ||
tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = MI;
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
@@ -701,7 +689,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
LiveRange LR(start, end, vni);
interval.addRange(LR);
- LR.valno->addKill(end);
DEBUG(dbgs() << " +" << LR << '\n');
}
@@ -787,37 +774,6 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
return NewLI;
}
-/// getVNInfoSourceReg - Helper function that parses the specified VNInfo
-/// copy field and returns the source register that defines it.
-unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
- if (!VNI->getCopy())
- return 0;
-
- if (VNI->getCopy()->isExtractSubreg()) {
- // If it's extracting out of a physical register, return the sub-register.
- unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm();
- unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg();
- if (SrcSubReg == DstSubReg)
- // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3
- // reg1034 can still be coalesced to EDX.
- return Reg;
- assert(DstSubReg == 0);
- Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
- }
- return Reg;
- } else if (VNI->getCopy()->isInsertSubreg() ||
- VNI->getCopy()->isSubregToReg())
- return VNI->getCopy()->getOperand(2).getReg();
-
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg))
- return SrcReg;
- llvm_unreachable("Unrecognized copy instruction!");
- return 0;
-}
-
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
@@ -991,22 +947,22 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
if (DefMI && (MRInfo & VirtRegMap::isMod))
return false;
- MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot)
- : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI);
+ MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot)
+ : tii_->foldMemoryOperand(MI, FoldOps, DefMI);
if (fmi) {
// Remember this instruction uses the spill slot.
if (isSS) vrm.addSpillSlotUse(Slot, fmi);
// Attempt to fold the memory reference into the instruction. If
// we can do this, we don't need to insert spill code.
- MachineBasicBlock &MBB = *MI->getParent();
if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot))
vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo);
vrm.transferSpillPts(MI, fmi);
vrm.transferRestorePts(MI, fmi);
vrm.transferEmergencySpills(MI, fmi);
ReplaceMachineInstrInMaps(MI, fmi);
- MI = MBB.insert(MBB.erase(MI), fmi);
+ MI->eraseFromParent();
+ MI = fmi;
++numFolds;
return true;
}
@@ -1098,7 +1054,6 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (!mop.isReg())
continue;
unsigned Reg = mop.getReg();
- unsigned RegI = Reg;
if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
if (Reg != li.reg)
@@ -1140,26 +1095,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
//
// Keep track of whether we replace a use and/or def so that we can
// create the spill interval with the appropriate range.
-
- HasUse = mop.isUse();
- HasDef = mop.isDef();
SmallVector<unsigned, 2> Ops;
- Ops.push_back(i);
- for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
- const MachineOperand &MOj = MI->getOperand(j);
- if (!MOj.isReg())
- continue;
- unsigned RegJ = MOj.getReg();
- if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ))
- continue;
- if (RegJ == RegI) {
- Ops.push_back(j);
- if (!MOj.isUndef()) {
- HasUse |= MOj.isUse();
- HasDef |= MOj.isDef();
- }
- }
- }
+ tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops);
// Create a new virtual register for the spill interval.
// Create the new register now so we can map the fold instruction
@@ -1294,16 +1231,7 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li,
const VNInfo *VNI,
MachineBasicBlock *MBB,
SlotIndex Idx) const {
- SlotIndex End = getMBBEndIdx(MBB);
- for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) {
- if (VNI->kills[j].isPHI())
- continue;
-
- SlotIndex KillIdx = VNI->kills[j];
- if (KillIdx > Idx && KillIdx <= End)
- return true;
- }
- return false;
+ return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB));
}
/// RewriteInfo - Keep track of machine instrs that will be rewritten
@@ -1312,10 +1240,7 @@ namespace {
struct RewriteInfo {
SlotIndex Index;
MachineInstr *MI;
- bool HasUse;
- bool HasDef;
- RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d)
- : Index(i), MI(mi), HasUse(u), HasDef(d) {}
+ RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {}
};
struct RewriteInfoCompare {
@@ -1394,7 +1319,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
// easily see a situation where both registers are reloaded before
// the INSERT_SUBREG and both target registers that would overlap.
continue;
- RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef()));
+ RewriteMIs.push_back(RewriteInfo(index, MI));
}
std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare());
@@ -1404,18 +1329,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
RewriteInfo &rwi = RewriteMIs[i];
++i;
SlotIndex index = rwi.Index;
- bool MIHasUse = rwi.HasUse;
- bool MIHasDef = rwi.HasDef;
MachineInstr *MI = rwi.MI;
// If MI def and/or use the same register multiple times, then there
// are multiple entries.
- unsigned NumUses = MIHasUse;
while (i != e && RewriteMIs[i].MI == MI) {
assert(RewriteMIs[i].Index == index);
- bool isUse = RewriteMIs[i].HasUse;
- if (isUse) ++NumUses;
- MIHasUse |= isUse;
- MIHasDef |= RewriteMIs[i].HasDef;
++i;
}
MachineBasicBlock *MBB = MI->getParent();
@@ -1440,7 +1358,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
// = use
// It's better to start a new interval to avoid artifically
// extend the new interval.
- if (MIHasDef && !MIHasUse) {
+ if (MI->readsWritesVirtualRegister(li.reg) ==
+ std::make_pair(false,true)) {
MBBVRegsMap.erase(MBB->getNumber());
ThisVReg = 0;
}
@@ -1652,103 +1571,9 @@ LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
}
std::vector<LiveInterval*> LiveIntervals::
-addIntervalsForSpillsFast(const LiveInterval &li,
- const MachineLoopInfo *loopInfo,
- VirtRegMap &vrm) {
- unsigned slot = vrm.assignVirt2StackSlot(li.reg);
-
- std::vector<LiveInterval*> added;
-
- assert(li.isSpillable() && "attempt to spill already spilled interval!");
-
- DEBUG({
- dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
- li.dump();
- dbgs() << '\n';
- });
-
- const TargetRegisterClass* rc = mri_->getRegClass(li.reg);
-
- MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg);
- while (RI != mri_->reg_end()) {
- MachineInstr* MI = &*RI;
-
- SmallVector<unsigned, 2> Indices;
- bool HasUse = false;
- bool HasDef = false;
-
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& mop = MI->getOperand(i);
- if (!mop.isReg() || mop.getReg() != li.reg) continue;
-
- HasUse |= MI->getOperand(i).isUse();
- HasDef |= MI->getOperand(i).isDef();
-
- Indices.push_back(i);
- }
-
- if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI),
- Indices, true, slot, li.reg)) {
- unsigned NewVReg = mri_->createVirtualRegister(rc);
- vrm.grow();
- vrm.assignVirt2StackSlot(NewVReg, slot);
-
- // create a new register for this spill
- LiveInterval &nI = getOrCreateInterval(NewVReg);
- nI.markNotSpillable();
-
- // Rewrite register operands to use the new vreg.
- for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
- E = Indices.end(); I != E; ++I) {
- MI->getOperand(*I).setReg(NewVReg);
-
- if (MI->getOperand(*I).isUse())
- MI->getOperand(*I).setIsKill(true);
- }
-
- // Fill in the new live interval.
- SlotIndex index = getInstructionIndex(MI);
- if (HasUse) {
- LiveRange LR(index.getLoadIndex(), index.getUseIndex(),
- nI.getNextValue(SlotIndex(), 0, false,
- getVNInfoAllocator()));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- vrm.addRestorePoint(NewVReg, MI);
- }
- if (HasDef) {
- LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, false,
- getVNInfoAllocator()));
- DEBUG(dbgs() << " +" << LR);
- nI.addRange(LR);
- vrm.addSpillPoint(NewVReg, true, MI);
- }
-
- added.push_back(&nI);
-
- DEBUG({
- dbgs() << "\t\t\t\tadded new interval: ";
- nI.dump();
- dbgs() << '\n';
- });
- }
-
-
- RI = mri_->reg_begin(li.reg);
- }
-
- return added;
-}
-
-std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
SmallVectorImpl<LiveInterval*> &SpillIs,
const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
-
- if (EnableFastSpilling)
- return addIntervalsForSpillsFast(li, loopInfo, vrm);
-
assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
@@ -2184,7 +2009,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
startInst, true, getVNInfoAllocator());
VN->setHasPHIKill(true);
- VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent()));
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
getMBBEndIdx(startInst->getParent()), VN);
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index 798b9b9..709e2c6 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -35,8 +35,8 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
}
void LiveStacks::releaseMemory() {
- // Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.DestroyAll();
+ // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd.
+ VNInfoAllocator.Reset();
S2IMap.clear();
S2RCMap.clear();
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 079684e..41b891d 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -286,7 +286,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) {
MachineInstr *LastDef = PhysRegDef[Reg];
MachineInstr *LastUse = PhysRegUse[Reg];
if (!LastDef && !LastUse)
- return false;
+ return 0;
MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef;
unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef];
@@ -609,7 +609,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Finally, if the last instruction in the block is a return, make sure to
// mark it as using all of the live-out values in the function.
- if (!MBB->empty() && MBB->back().getDesc().isReturn()) {
+ // Things marked both call and return are tail calls; do not do this for
+ // them. The tail callee need not take the same registers as input
+ // that it produces as output, and there are dependencies for its input
+ // registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn()
+ && !MBB->back().getDesc().isCall()) {
MachineInstr *Ret = &MBB->back();
for (MachineRegisterInfo::liveout_iterator
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index b0348a5..dfd4eae 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -53,15 +53,15 @@ namespace {
bool runOnMachineFunction(MachineFunction&);
private:
- bool LowerExtract(MachineInstr *MI);
- bool LowerInsert(MachineInstr *MI);
bool LowerSubregToReg(MachineInstr *MI);
+ bool LowerCopy(MachineInstr *MI);
void TransferDeadFlag(MachineInstr *MI, unsigned DstReg,
const TargetRegisterInfo *TRI);
void TransferKillFlag(MachineInstr *MI, unsigned SrcReg,
const TargetRegisterInfo *TRI,
bool AddIfNotFound = false);
+ void TransferImplicitDefs(MachineInstr *MI);
};
char LowerSubregsInstructionPass::ID = 0;
@@ -83,7 +83,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI,
if (MII->addRegisterDead(DstReg, TRI))
break;
assert(MII != MI->getParent()->begin() &&
- "copyRegToReg output doesn't reference destination register!");
+ "copyPhysReg output doesn't reference destination register!");
}
}
@@ -100,64 +100,24 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI,
if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound))
break;
assert(MII != MI->getParent()->begin() &&
- "copyRegToReg output doesn't reference source register!");
+ "copyPhysReg output doesn't reference source register!");
}
}
-bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
- MachineBasicBlock *MBB = MI->getParent();
-
- assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
- MI->getOperand(1).isReg() && MI->getOperand(1).isUse() &&
- MI->getOperand(2).isImm() && "Malformed extract_subreg");
-
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned SuperReg = MI->getOperand(1).getReg();
- unsigned SubIdx = MI->getOperand(2).getImm();
- unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx);
-
- assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) &&
- "Extract supperg source must be a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- "Extract destination must be in a physical register");
- assert(SrcReg && "invalid subregister index for register");
-
- DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
-
- if (SrcReg == DstReg) {
- // No need to insert an identity copy instruction.
- if (MI->getOperand(1).isKill()) {
- // We must make sure the super-register gets killed. Replace the
- // instruction with KILL.
- MI->setDesc(TII->get(TargetOpcode::KILL));
- MI->RemoveOperand(2); // SubIdx
- DEBUG(dbgs() << "subreg: replace by: " << *MI);
- return true;
- }
-
- DEBUG(dbgs() << "subreg: eliminated!");
- } else {
- // Insert copy
- const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
- const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
- // Transfer the kill/dead flags, if needed.
- if (MI->getOperand(0).isDead())
- TransferDeadFlag(MI, DstReg, TRI);
- if (MI->getOperand(1).isKill())
- TransferKillFlag(MI, SuperReg, TRI, true);
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI);
- });
+/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered
+/// replacement instructions immediately precede it. Copy any implicit-def
+/// operands from MI to the replacement instruction.
+void
+LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) {
+ MachineBasicBlock::iterator CopyMI = MI;
+ --CopyMI;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isImplicit() || MO.isUse())
+ continue;
+ CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true));
}
-
- DEBUG(dbgs() << '\n');
- MBB->erase(MI);
- return true;
}
bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
@@ -166,10 +126,10 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
MI->getOperand(1).isImm() &&
(MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
-
+
unsigned DstReg = MI->getOperand(0).getReg();
unsigned InsReg = MI->getOperand(2).getReg();
- unsigned InsSIdx = MI->getOperand(2).getSubReg();
+ assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
unsigned SubIdx = MI->getOperand(3).getImm();
assert(SubIdx != 0 && "Invalid index for insert_subreg");
@@ -182,27 +142,25 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
- if (DstSubReg == InsReg && InsSIdx == 0) {
+ if (DstSubReg == InsReg) {
// No need to insert an identify copy instruction.
// Watch out for case like this:
- // %RAX<def> = ...
- // %RAX<def> = SUBREG_TO_REG 0, %EAX:3<kill>, 3
- // The first def is defining RAX, not EAX so the top bits were not
- // zero extended.
+ // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3
+ // We must leave %RAX live.
+ if (DstReg != InsReg) {
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ MI->RemoveOperand(3); // SubIdx
+ MI->RemoveOperand(1); // Imm
+ DEBUG(dbgs() << "subreg: replace by: " << *MI);
+ return true;
+ }
DEBUG(dbgs() << "subreg: eliminated!");
} else {
- // Insert sub-register copy
- const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
- const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
+ TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg,
+ MI->getOperand(2).isKill());
// Transfer the kill/dead flags, if needed.
if (MI->getOperand(0).isDead())
TransferDeadFlag(MI, DstSubReg, TRI);
- if (MI->getOperand(2).isKill())
- TransferKillFlag(MI, InsReg, TRI);
DEBUG({
MachineBasicBlock::iterator dMI = MI;
dbgs() << "subreg: " << *(--dMI);
@@ -214,87 +172,39 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
return true;
}
-bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
- MachineBasicBlock *MBB = MI->getParent();
- assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) &&
- (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) &&
- (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
- MI->getOperand(3).isImm() && "Invalid insert_subreg");
-
- unsigned DstReg = MI->getOperand(0).getReg();
-#ifndef NDEBUG
- unsigned SrcReg = MI->getOperand(1).getReg();
-#endif
- unsigned InsReg = MI->getOperand(2).getReg();
- unsigned SubIdx = MI->getOperand(3).getImm();
+bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) {
+ MachineOperand &DstMO = MI->getOperand(0);
+ MachineOperand &SrcMO = MI->getOperand(1);
- assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?");
- assert(SubIdx != 0 && "Invalid index for insert_subreg");
- unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
- assert(DstSubReg && "invalid subregister index for register");
- assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- "Insert superreg source must be in a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
- "Inserted value must be in a physical register");
-
- DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
-
- if (DstSubReg == InsReg) {
- // No need to insert an identity copy instruction. If the SrcReg was
- // <undef>, we need to make sure it is alive by inserting a KILL
- if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) {
- MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::KILL), DstReg);
- if (MI->getOperand(2).isUndef())
- MIB.addReg(InsReg, RegState::Undef);
- else
- MIB.addReg(InsReg, RegState::Kill);
- } else {
- DEBUG(dbgs() << "subreg: eliminated!\n");
- MBB->erase(MI);
+ if (SrcMO.getReg() == DstMO.getReg()) {
+ DEBUG(dbgs() << "identity copy: " << *MI);
+ // No need to insert an identity copy instruction, but replace with a KILL
+ // if liveness is changed.
+ if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) {
+ // We must make sure the super-register gets killed. Replace the
+ // instruction with KILL.
+ MI->setDesc(TII->get(TargetOpcode::KILL));
+ DEBUG(dbgs() << "replaced by: " << *MI);
return true;
}
- } else {
- // Insert sub-register copy
- const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
- const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
- if (MI->getOperand(2).isUndef())
- // If the source register being inserted is undef, then this becomes a
- // KILL.
- BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::KILL), DstSubReg);
- else {
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Subreg and Dst must be of compatible register class");
- }
- MachineBasicBlock::iterator CopyMI = MI;
- --CopyMI;
-
- // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg.
- if (!MI->getOperand(1).isUndef())
- CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true));
-
- // Transfer the kill/dead flags, if needed.
- if (MI->getOperand(0).isDead()) {
- TransferDeadFlag(MI, DstSubReg, TRI);
- } else {
- // Make sure the full DstReg is live after this replacement.
- CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true));
- }
-
- // Make sure the inserted register gets killed
- if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef())
- TransferKillFlag(MI, InsReg, TRI);
+ // Vanilla identity copy.
+ MI->eraseFromParent();
+ return true;
}
- DEBUG({
- MachineBasicBlock::iterator dMI = MI;
- dbgs() << "subreg: " << *(--dMI) << "\n";
- });
+ DEBUG(dbgs() << "real copy: " << *MI);
+ TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(),
+ DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill());
- MBB->erase(MI);
+ if (DstMO.isDead())
+ TransferDeadFlag(MI, DstMO.getReg(), TRI);
+ if (MI->getNumOperands() > 2)
+ TransferImplicitDefs(MI);
+ DEBUG({
+ MachineBasicBlock::iterator dMI = MI;
+ dbgs() << "replaced by: " << *(--dMI);
+ });
+ MI->eraseFromParent();
return true;
}
@@ -317,12 +227,13 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
mi != me;) {
MachineBasicBlock::iterator nmi = llvm::next(mi);
MachineInstr *MI = mi;
- if (MI->isExtractSubreg()) {
- MadeChange |= LowerExtract(MI);
- } else if (MI->isInsertSubreg()) {
- MadeChange |= LowerInsert(MI);
- } else if (MI->isSubregToReg()) {
+ assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear");
+ assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
+ "EXTRACT_SUBREG should no longer appear");
+ if (MI->isSubregToReg()) {
MadeChange |= LowerSubregToReg(MI);
+ } else if (MI->isCopy()) {
+ MadeChange |= LowerCopy(MI);
}
mi = nmi;
}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index eaaa1f8..a27ee47 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -13,7 +13,10 @@
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/BasicBlock.h"
+#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -136,6 +139,13 @@ void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
Parent->getParent()->DeleteMachineInstr(MI);
}
+MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
+ iterator I = begin();
+ while (I != end() && I->isPHI())
+ ++I;
+ return I;
+}
+
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
while (I != begin() && (--I)->getDesc().isTerminator())
@@ -245,6 +255,7 @@ void MachineBasicBlock::updateTerminator() {
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
+ DebugLoc dl; // FIXME: this is nowhere
bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
(void) B;
assert(!B && "UpdateTerminators requires analyzable predecessors!");
@@ -259,7 +270,7 @@ void MachineBasicBlock::updateTerminator() {
// its layout successor, insert a branch.
TBB = *succ_begin();
if (!isLayoutSuccessor(TBB))
- TII->InsertBranch(*this, TBB, 0, Cond);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
} else {
if (FBB) {
@@ -270,10 +281,10 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond))
return;
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, FBB, 0, Cond);
+ TII->InsertBranch(*this, FBB, 0, Cond, dl);
} else if (isLayoutSuccessor(FBB)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, 0, Cond);
+ TII->InsertBranch(*this, TBB, 0, Cond, dl);
}
} else {
// The block has a fallthrough conditional branch.
@@ -284,14 +295,14 @@ void MachineBasicBlock::updateTerminator() {
if (TII->ReverseBranchCondition(Cond)) {
// We can't reverse the condition, add an unconditional branch.
Cond.clear();
- TII->InsertBranch(*this, MBBA, 0, Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
return;
}
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, MBBA, 0, Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond, dl);
} else if (!isLayoutSuccessor(MBBA)) {
TII->RemoveBranch(*this);
- TII->InsertBranch(*this, TBB, MBBA, Cond);
+ TII->InsertBranch(*this, TBB, MBBA, Cond, dl);
}
}
}
@@ -331,12 +342,32 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
if (this == fromMBB)
return;
- for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(),
- E = fromMBB->succ_end(); I != E; ++I)
- addSuccessor(*I);
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ addSuccessor(Succ);
+ fromMBB->removeSuccessor(Succ);
+ }
+}
+
+void
+MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
+ if (this == fromMBB)
+ return;
- while (!fromMBB->succ_empty())
- fromMBB->removeSuccessor(fromMBB->succ_begin());
+ while (!fromMBB->succ_empty()) {
+ MachineBasicBlock *Succ = *fromMBB->succ_begin();
+ addSuccessor(Succ);
+ fromMBB->removeSuccessor(Succ);
+
+ // Fix up any PHI nodes in the successor.
+ for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end();
+ MI != ME && MI->isPHI(); ++MI)
+ for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.getMBB() == fromMBB)
+ MO.setMBB(this);
+ }
+ }
}
bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
@@ -395,6 +426,82 @@ bool MachineBasicBlock::canFallThrough() {
return FBB == 0;
}
+MachineBasicBlock *
+MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
+ MachineFunction *MF = getParent();
+ DebugLoc dl; // FIXME: this is nowhere
+
+ // We may need to update this's terminator, but we can't do that if AnalyzeBranch
+ // fails. If this uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
+ return NULL;
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
+ DEBUG(dbgs() << "PHIElimination splitting critical edge:"
+ " BB#" << getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << Succ->getNumber() << '\n');
+
+ ReplaceUsesOfBlockWith(Succ, NMBB);
+ updateTerminator();
+
+ // Insert unconditional "jump Succ" instruction in NMBB if necessary.
+ NMBB->addSuccessor(Succ);
+ if (!NMBB->isLayoutSuccessor(Succ)) {
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl);
+ }
+
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this
+ for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end();
+ i != e && i->isPHI(); ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == this)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ if (LiveVariables *LV =
+ P->getAnalysisIfAvailable<LiveVariables>())
+ LV->addNewBlock(NMBB, this, Succ);
+
+ if (MachineDominatorTree *MDT =
+ P->getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->addNewBlock(NMBB, this);
+
+ if (MachineLoopInfo *MLI =
+ P->getAnalysisIfAvailable<MachineLoopInfo>())
+ if (MachineLoop *TIL = MLI->getLoopFor(this)) {
+ // If one or the other blocks were not in a loop, the new block is not
+ // either, and thus LI doesn't need to be updated.
+ if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) {
+ if (TIL == DestLoop) {
+ // Both in the same loop, the NMBB joins loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (TIL->contains(DestLoop)) {
+ // Edge from an outer loop to an inner loop. Add to the outer loop.
+ TIL->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else if (DestLoop->contains(TIL)) {
+ // Edge from an inner loop to an outer loop. Add to the outer loop.
+ DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase());
+ } else {
+ // Edge from two loops with no containment relation. Because these
+ // are natural loops, we know that the destination block must be the
+ // header of its loop (adding a branch into a loop elsewhere would
+ // create an irreducible loop).
+ assert(DestLoop->getHeader() == Succ &&
+ "Should not create irreducible loops!");
+ if (MachineLoop *P = DestLoop->getParentLoop())
+ P->addBasicBlockToLoop(NMBB, MLI->getBase());
+ }
+ }
+ }
+
+ return NMBB;
+}
+
/// removeFromParent - This method unlinks 'this' from the containing function,
/// and returns it, but does not delete it.
MachineBasicBlock *MachineBasicBlock::removeFromParent() {
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6f4f7a8..833cc00 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -30,9 +30,7 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
-
-static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs",
- cl::init(false), cl::Hidden);
+STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -128,6 +126,28 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
++NumCoalesces;
Changed = true;
}
+
+ if (!DefMI->isCopy())
+ continue;
+ SrcReg = DefMI->getOperand(1).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ continue;
+ if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
+ continue;
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
+ if (!NewRC)
+ continue;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
+ if (NewRC != SRC)
+ MRI->setRegClass(SrcReg, NewRC);
+ DefMI->eraseFromParent();
+ ++NumCoalesces;
+ Changed = true;
}
return Changed;
@@ -172,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
/// physical registers (except for dead defs of physical registers). It also
-/// returns the physical register def by reference if it's the only one.
+/// returns the physical register def by reference if it's the only one and the
+/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
const MachineBasicBlock *MBB,
unsigned &PhysDef) const {
@@ -186,9 +207,11 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isUse())
+ if (MO.isUse()) {
// Can't touch anything to read a physical register.
+ PhysDef = 0;
return true;
+ }
if (MO.isDead())
// If the def is dead, it's ok.
continue;
@@ -240,8 +263,8 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
- MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg();
+ return MI->isCopyLike() ||
+ TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
}
bool MachineCSE::isCSECandidate(MachineInstr *MI) {
@@ -356,6 +379,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
if (!isCSECandidate(MI))
continue;
+ bool DefPhys = false;
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
@@ -376,11 +400,13 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// ... Unless the CS is local and it also defines the physical register
// which is not clobbered in between.
- if (PhysDef && CSEPhysDef) {
+ if (PhysDef) {
unsigned CSVN = VNT.lookup(MI);
MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefReaches(CSMI, MI, PhysDef))
+ if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
FoundCSE = true;
+ DefPhys = true;
+ }
}
}
@@ -426,6 +452,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
}
MI->eraseFromParent();
++NumCSEs;
+ if (DefPhys)
+ ++NumPhysCSEs;
} else {
DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 4088739..b5f8fbb 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -46,7 +46,6 @@ MachineDominatorTree::MachineDominatorTree()
}
MachineDominatorTree::~MachineDominatorTree() {
- DT->releaseMemory();
delete DT;
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index a38c881..666120f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const
#ifndef NDEBUG
ViewGraph(this, "mf" + getFunction()->getNameStr());
#else
- errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ errs() << "MachineFunction::viewCFG is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
@@ -388,7 +388,7 @@ void MachineFunction::viewCFGOnly() const
#ifndef NDEBUG
ViewGraph(this, "mf" + getFunction()->getNameStr(), true);
#else
- errs() << "SelectionDAG::viewGraph is only available in debug builds on "
+ errs() << "MachineFunction::viewCFGOnly is only available in debug builds on "
<< "systems with Graphviz or gv!\n";
#endif // NDEBUG
}
@@ -438,10 +438,16 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable, bool isSS) {
+ bool Immutable) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
- Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
- isSS));
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = TFI.getStackAlignment();
+ unsigned Align = MinAlign(SPOffset, StackAlign);
+ Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
+ /*isSS*/false));
return -++NumFixedObjects;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e54cd5c..6b2e985 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -111,6 +111,26 @@ void MachineOperand::setReg(unsigned Reg) {
Contents.Reg.RegNo = Reg;
}
+void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+ const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ if (SubIdx && getSubReg())
+ SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
+ setReg(Reg);
+ if (SubIdx)
+ setSubReg(SubIdx);
+}
+
+void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ if (getSubReg()) {
+ Reg = TRI.getSubReg(Reg, getSubReg());
+ assert(Reg && "Invalid SubReg for physical register");
+ setSubReg(0);
+ }
+ setReg(Reg);
+}
+
/// ChangeToImmediate - Replace this operand with a new immediate operand of
/// the specified value. If an operand is known to be an immediate already,
/// the setImm method should be used.
@@ -861,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const {
bool MachineInstr::
isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
if (isInlineAsm()) {
- assert(DefOpIdx >= 2);
+ assert(DefOpIdx >= 3);
const MachineOperand &MO = getOperand(DefOpIdx);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
return false;
// Determine the actual operand index that corresponds to this index.
unsigned DefNo = 0;
unsigned DefPart = 0;
- for (unsigned i = 1, e = getNumOperands(); i < e; ) {
+ for (unsigned i = 2, e = getNumOperands(); i < e; ) {
const MachineOperand &FMO = getOperand(i);
// After the normal asm operands there may be additional imp-def regs.
if (!FMO.isImm())
@@ -883,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
}
++DefNo;
}
- for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
const MachineOperand &FMO = getOperand(i);
if (!FMO.isImm())
continue;
@@ -926,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
// Find the flag operand corresponding to UseOpIdx
unsigned FlagIdx, NumOps=0;
- for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
const MachineOperand &UFMO = getOperand(FlagIdx);
// After the normal asm operands there may be additional imp-def regs.
if (!UFMO.isImm())
@@ -944,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
if (!DefOpIdx)
return true;
- unsigned DefIdx = 1;
- // Remember to adjust the index. First operand is asm string, then there
- // is a flag for each.
+ unsigned DefIdx = 2;
+ // Remember to adjust the index. First operand is asm string, second is
+ // the AlignStack bit, then there is a flag for each.
while (DefNo) {
const MachineOperand &FMO = getOperand(DefIdx);
assert(FMO.isImm());
@@ -1017,6 +1037,29 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
}
}
+void MachineInstr::substituteRegister(unsigned FromReg,
+ unsigned ToReg,
+ unsigned SubIdx,
+ const TargetRegisterInfo &RegInfo) {
+ if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (SubIdx)
+ ToReg = RegInfo.getSubReg(ToReg, SubIdx);
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substPhysReg(ToReg, RegInfo);
+ }
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != FromReg)
+ continue;
+ MO.substVirtReg(ToReg, SubIdx, RegInfo);
+ }
+ }
+}
+
/// isSafeToMove - Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
@@ -1168,6 +1211,28 @@ void MachineInstr::dump() const {
dbgs() << " " << *this;
}
+static void printDebugLoc(DebugLoc DL, const MachineFunction *MF,
+ raw_ostream &CommentOS) {
+ const LLVMContext &Ctx = MF->getFunction()->getContext();
+ if (!DL.isUnknown()) { // Print source line info.
+ DIScope Scope(DL.getScope(Ctx));
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (Scope.Verify())
+ CommentOS << Scope.getFilename();
+ else
+ CommentOS << "<unknown>";
+ CommentOS << ':' << DL.getLine();
+ if (DL.getCol() != 0)
+ CommentOS << ':' << DL.getCol();
+ DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx));
+ if (!InlinedAtDL.isUnknown()) {
+ CommentOS << " @[ ";
+ printDebugLoc(InlinedAtDL, MF, CommentOS);
+ CommentOS << " ]";
+ }
+ }
+}
+
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
const MachineFunction *MF = 0;
@@ -1240,6 +1305,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "!\"" << MDS->getString() << '\"';
else
MO.print(OS, TM);
+ } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
+ OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
} else
MO.print(OS, TM);
}
@@ -1265,19 +1332,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";";
-
- // TODO: print InlinedAtLoc information
-
- DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext()));
OS << " dbg:";
- // Omit the directory, since it's usually long and uninteresting.
- if (Scope.Verify())
- OS << Scope.getFilename();
- else
- OS << "<unknown>";
- OS << ':' << debugLoc.getLine();
- if (debugLoc.getCol() != 0)
- OS << ':' << debugLoc.getCol();
+ printDebugLoc(debugLoc, MF, OS);
}
OS << "\n";
@@ -1418,6 +1474,25 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg,
true /*IsImp*/));
}
+void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl<unsigned> &UsedRegs,
+ const TargetRegisterInfo &TRI) {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+ bool Dead = true;
+ for (SmallVectorImpl<unsigned>::const_iterator I = UsedRegs.begin(),
+ E = UsedRegs.end(); I != E; ++I)
+ if (TRI.regsOverlap(*I, Reg)) {
+ Dead = false;
+ break;
+ }
+ // If there are no uses, including partial uses, the def is dead.
+ if (Dead) MO.setIsDead();
+ }
+}
+
unsigned
MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
unsigned Hash = MI->getOpcode() * 37;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 6120617..956d21c 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -62,6 +62,7 @@ namespace {
// State that is updated as we process loops
bool Changed; // True if a loop is changed.
+ bool FirstInLoop; // True if it's the first LICM in the loop.
MachineLoop *CurLoop; // The current loop we are working on.
MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
@@ -82,7 +83,6 @@ namespace {
const char *getPassName() const { return "Machine Instruction LICM"; }
- // FIXME: Loop preheaders?
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
@@ -127,8 +127,8 @@ namespace {
void AddToLiveIns(unsigned Reg);
/// IsLICMCandidate - Returns true if the instruction may be a suitable
- /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
- /// not safe to hoist it.
+ /// candidate for LICM. e.g. If the instruction is a call, then it's
+ /// obviously not safe to hoist it.
bool IsLICMCandidate(MachineInstr &I);
/// IsLoopInvariantInst - Returns true if the instruction is loop
@@ -181,6 +181,10 @@ namespace {
/// current loop preheader that may become duplicates of instructions that
/// are hoisted out of the loop.
void InitCSEMap(MachineBasicBlock *BB);
+
+ /// getCurPreheader - Get the preheader for the current loop, splitting
+ /// a critical edge if needed.
+ MachineBasicBlock *getCurPreheader();
};
} // end anonymous namespace
@@ -192,12 +196,17 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
}
-/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most
-/// loop that has a preheader.
-static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) {
+/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
+/// loop that has a unique predecessor.
+static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
+ // Check whether this loop even has a unique predecessor.
+ if (!CurLoop->getLoopPredecessor())
+ return false;
+ // Ok, now check to see if any of its outer loops do.
for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
- if (L->getLoopPreheader())
+ if (L->getLoopPredecessor())
return false;
+ // None of them did, so this is the outermost with a unique predecessor.
return true;
}
@@ -207,7 +216,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
else
DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
- Changed = false;
+ Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
TRI = TM->getRegisterInfo();
@@ -220,23 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
DT = &getAnalysis<MachineDominatorTree>();
AA = &getAnalysis<AliasAnalysis>();
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){
- CurLoop = *I;
+ SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
+ while (!Worklist.empty()) {
+ CurLoop = Worklist.pop_back_val();
+ CurPreheader = 0;
// If this is done before regalloc, only visit outer-most preheader-sporting
// loops.
- if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop))
- continue;
-
- // Determine the block to which to hoist instructions. If we can't find a
- // suitable loop preheader, we can't do any hoisting.
- //
- // FIXME: We are only hoisting if the basic block coming into this loop
- // has only one successor. This isn't the case in general because we haven't
- // broken critical edges or added preheaders.
- CurPreheader = CurLoop->getLoopPreheader();
- if (!CurPreheader)
+ if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
+ Worklist.append(CurLoop->begin(), CurLoop->end());
continue;
+ }
if (!PreRegAlloc)
HoistRegionPostRA();
@@ -244,6 +247,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// CSEMap is initialized for loop header when the first instruction is
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
+ FirstInLoop = true;
HoistRegion(N);
CSEMap.clear();
}
@@ -436,13 +440,16 @@ void MachineLICM::AddToLiveIns(unsigned Reg) {
/// operands that is safe to hoist, this instruction is called to do the
/// dirty work.
void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader) return;
+
// Now move the instructions to the predecessor, inserting it before any
// terminator instructions.
DEBUG({
dbgs() << "Hoisting " << *MI;
- if (CurPreheader->getBasicBlock())
+ if (Preheader->getBasicBlock())
dbgs() << " to MachineBasicBlock "
- << CurPreheader->getName();
+ << Preheader->getName();
if (MI->getParent()->getBasicBlock())
dbgs() << " from MachineBasicBlock "
<< MI->getParent()->getName();
@@ -451,7 +458,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
// Splice the instruction to the preheader.
MachineBasicBlock *MBB = MI->getParent();
- CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI);
+ Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
// Add register to livein list to all the BBs in the current loop since a
// loop invariant must be kept live throughout the whole loop. This is
@@ -490,26 +497,16 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
/// not safe to hoist it.
bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
+ // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they
+ // are an argument to some other otherwise-hoistable instruction?
if (I.isImplicitDef())
return false;
-
- const TargetInstrDesc &TID = I.getDesc();
- // Ignore stuff that we obviously can't hoist.
- if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.hasUnmodeledSideEffects())
+ // Check if it's safe to move the instruction.
+ bool DontMoveAcrossStore = true;
+ if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
return false;
-
- if (TID.mayLoad()) {
- // Okay, this instruction does a load. As a refinement, we allow the target
- // to decide whether the loaded value is actually a constant. If so, we can
- // actually use it as a load.
- if (!I.isInvariantLoad(AA))
- // FIXME: we should be able to hoist loads with no other side effects if
- // there are no other instructions which can change memory in this loop.
- // This is a trivial form of alias analysis.
- return false;
- }
+
return true;
}
@@ -754,6 +751,9 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// that are safe to hoist, this instruction is called to do the dirty work.
///
void MachineLICM::Hoist(MachineInstr *MI) {
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader) return;
+
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
@@ -765,9 +765,9 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// terminator instructions.
DEBUG({
dbgs() << "Hoisting " << *MI;
- if (CurPreheader->getBasicBlock())
+ if (Preheader->getBasicBlock())
dbgs() << " to MachineBasicBlock "
- << CurPreheader->getName();
+ << Preheader->getName();
if (MI->getParent()->getBasicBlock())
dbgs() << " from MachineBasicBlock "
<< MI->getParent()->getName();
@@ -776,7 +776,10 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// If this is the first instruction being hoisted to the preheader,
// initialize the CSE map with potential common expressions.
- InitCSEMap(CurPreheader);
+ if (FirstInLoop) {
+ InitCSEMap(Preheader);
+ FirstInLoop = false;
+ }
// Look for opportunity to CSE the hoisted instruction.
unsigned Opcode = MI->getOpcode();
@@ -784,7 +787,7 @@ void MachineLICM::Hoist(MachineInstr *MI) {
CI = CSEMap.find(Opcode);
if (!EliminateCSE(MI, CI)) {
// Otherwise, splice the instruction to the preheader.
- CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
+ Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
@@ -808,3 +811,30 @@ void MachineLICM::Hoist(MachineInstr *MI) {
++NumHoisted;
Changed = true;
}
+
+MachineBasicBlock *MachineLICM::getCurPreheader() {
+ // Determine the block to which to hoist instructions. If we can't find a
+ // suitable loop predecessor, we can't do any hoisting.
+
+ // If we've tried to get a preheader and failed, don't try again.
+ if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
+ return 0;
+
+ if (!CurPreheader) {
+ CurPreheader = CurLoop->getLoopPreheader();
+ if (!CurPreheader) {
+ MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
+ if (!Pred) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+
+ CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
+ if (!CurPreheader) {
+ CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
+ return 0;
+ }
+ }
+ }
+ return CurPreheader;
+}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 70bf7e5..5d852f2 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
VRegInfo.reserve(256);
RegAllocHints.reserve(256);
- RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
+ RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
UsedPhysRegs.resize(TRI.getNumRegs());
// Create the physreg use/def lists.
@@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() {
"PhysRegUseDefLists has entries after all instructions are deleted");
#endif
delete [] PhysRegUseDefLists;
+ delete [] RegClass2VRegMap;
}
/// setRegClass - Set the register class of the specified virtual register.
@@ -52,7 +53,7 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
// Remove from old register class's vregs list. This may be slow but
// fortunately this operation is rarely needed.
std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
- std::vector<unsigned>::iterator I=std::find(VRegs.begin(), VRegs.end(), VR);
+ std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
VRegs.erase(I);
// Add to new register class's vregs list.
@@ -174,115 +175,36 @@ unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
return 0;
}
-static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
- cl::desc("Schedule copies of livein registers"),
- cl::init(false));
-
-/// EmitLiveInCopy - Emit a copy for a live in physical register. If the
-/// physical register has only a single copy use, then coalesced the copy
-/// if possible.
-static void EmitLiveInCopy(MachineBasicBlock *MBB,
- MachineBasicBlock::iterator &InsertPos,
- unsigned VirtReg, unsigned PhysReg,
- const TargetRegisterClass *RC,
- DenseMap<MachineInstr*, unsigned> &CopyRegMap,
- const MachineRegisterInfo &MRI,
- const TargetRegisterInfo &TRI,
- const TargetInstrInfo &TII) {
- unsigned NumUses = 0;
- MachineInstr *UseMI = NULL;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg),
- UE = MRI.use_end(); UI != UE; ++UI) {
- UseMI = &*UI;
- if (++NumUses > 1)
- break;
- }
-
- // If the number of uses is not one, or the use is not a move instruction,
- // don't coalesce. Also, only coalesce away a virtual register to virtual
- // register copy.
- bool Coalesced = false;
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (NumUses == 1 &&
- TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- TargetRegisterInfo::isVirtualRegister(DstReg)) {
- VirtReg = DstReg;
- Coalesced = true;
- }
-
- // Now find an ideal location to insert the copy.
- MachineBasicBlock::iterator Pos = InsertPos;
- while (Pos != MBB->begin()) {
- MachineInstr *PrevMI = prior(Pos);
- DenseMap<MachineInstr*, unsigned>::iterator RI = CopyRegMap.find(PrevMI);
- // copyRegToReg might emit multiple instructions to do a copy.
- unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second;
- if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg))
- // This is what the BB looks like right now:
- // r1024 = mov r0
- // ...
- // r1 = mov r1024
- //
- // We want to insert "r1025 = mov r1". Inserting this copy below the
- // move to r1024 makes it impossible for that move to be coalesced.
- //
- // r1025 = mov r1
- // r1024 = mov r0
- // ...
- // r1 = mov 1024
- // r2 = mov 1025
- break; // Woot! Found a good location.
- --Pos;
- }
-
- bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC,
- DebugLoc());
- assert(Emitted && "Unable to issue a live-in copy instruction!\n");
- (void) Emitted;
-
- CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg));
- if (Coalesced) {
- if (&*InsertPos == UseMI) ++InsertPos;
- MBB->erase(UseMI);
- }
-}
-
/// EmitLiveInCopies - Emit copies to initialize livein virtual registers
/// into the given entry block.
void
MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
const TargetRegisterInfo &TRI,
const TargetInstrInfo &TII) {
- if (SchedLiveInCopies) {
- // Emit the copies at a heuristically-determined location in the block.
- DenseMap<MachineInstr*, unsigned> CopyRegMap;
- MachineBasicBlock::iterator InsertPos = EntryMBB->begin();
- for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
- E = livein_end(); LI != E; ++LI)
- if (LI->second) {
- const TargetRegisterClass *RC = getRegClass(LI->second);
- EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first,
- RC, CopyRegMap, *this, TRI, TII);
+ // Emit the copies into the top of the block.
+ for (unsigned i = 0, e = LiveIns.size(); i != e; ++i)
+ if (LiveIns[i].second) {
+ if (use_empty(LiveIns[i].second)) {
+ // The livein has no uses. Drop it.
+ //
+ // It would be preferable to have isel avoid creating live-in
+ // records for unused arguments in the first place, but it's
+ // complicated by the debug info code for arguments.
+ LiveIns.erase(LiveIns.begin() + i);
+ --i; --e;
+ } else {
+ // Emit a copy.
+ BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ TII.get(TargetOpcode::COPY), LiveIns[i].second)
+ .addReg(LiveIns[i].first);
+
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
}
- } else {
- // Emit the copies into the top of the block.
- for (MachineRegisterInfo::livein_iterator LI = livein_begin(),
- E = livein_end(); LI != E; ++LI)
- if (LI->second) {
- const TargetRegisterClass *RC = getRegClass(LI->second);
- bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
- LI->second, LI->first, RC, RC,
- DebugLoc());
- assert(Emitted && "Unable to issue a live-in copy instruction!\n");
- (void) Emitted;
- }
- }
-
- // Add function live-ins to entry block live-in set.
- for (MachineRegisterInfo::livein_iterator I = livein_begin(),
- E = livein_end(); I != E; ++I)
- EntryMBB->addLiveIn(I->first);
+ } else {
+ // Add the register to the entry block live-in set.
+ EntryMBB->addLiveIn(LiveIns[i].first);
+ }
}
void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 1610e6c..61334fc 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass moves instructions into successor blocks, when possible, so that
+// This pass moves instructions into successor blocks when possible, so that
// they aren't executed on paths where their results aren't needed.
//
// This pass is not intended to be a replacement or a complete alternative
@@ -45,9 +45,9 @@ namespace {
public:
static char ID; // Pass identification
MachineSinking() : MachineFunctionPass(&ID) {}
-
+
virtual bool runOnMachineFunction(MachineFunction &MF);
-
+
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -63,7 +63,7 @@ namespace {
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const;
};
} // end anonymous namespace
-
+
char MachineSinking::ID = 0;
static RegisterPass<MachineSinking>
X("machine-sink", "Machine code sinking");
@@ -72,7 +72,7 @@ FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
MachineBasicBlock *MBB) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
@@ -80,27 +80,30 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
- for (MachineRegisterInfo::use_nodbg_iterator I =
- RegInfo->use_nodbg_begin(Reg),
- E = RegInfo->use_nodbg_end(); I != E; ++I) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+ I != E; ++I) {
// Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
+
if (UseInst->isPHI()) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB();
}
+
// Check that it dominates.
if (!DT->dominates(MBB, UseBlock))
return false;
}
+
return true;
}
bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "******** Machine Sinking ********\n");
-
+
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
@@ -111,19 +114,19 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
AllocatableSet = TRI->getAllocatableSet(MF);
bool EverMadeChange = false;
-
+
while (1) {
bool MadeChange = false;
// Process all basic blocks.
- for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
-
+
// If this iteration over the code changed anything, keep iterating.
if (!MadeChange) break;
EverMadeChange = true;
- }
+ }
return EverMadeChange;
}
@@ -132,8 +135,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (MBB.succ_size() <= 1 || MBB.empty()) return false;
// Don't bother sinking code out of unreachable blocks. In addition to being
- // unprofitable, it can also lead to infinite looping, because in an unreachable
- // loop there may be nowhere to stop.
+ // unprofitable, it can also lead to infinite looping, because in an
+ // unreachable loop there may be nowhere to stop.
if (!DT->isReachableFromEntry(&MBB)) return false;
bool MadeChange = false;
@@ -144,7 +147,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
bool ProcessedBegin, SawStore = false;
do {
MachineInstr *MI = I; // The instruction to sink.
-
+
// Predecrement I (if it's not begin) so that it isn't invalidated by
// sinking.
ProcessedBegin = I == MBB.begin();
@@ -156,10 +159,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
-
+
// If we just processed the first instruction in the block, we're done.
} while (!ProcessedBegin);
-
+
return MadeChange;
}
@@ -169,7 +172,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check if it's safe to move the instruction.
if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
-
+
// FIXME: This should include support for sinking instructions within the
// block they are currently in to shorten the live ranges. We often get
// instructions sunk into the top of a large block, but it would be better to
@@ -177,22 +180,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// be careful not to *increase* register pressure though, e.g. sinking
// "x = y + z" down if it kills y and z would increase the live ranges of y
// and z and only shrink the live range of x.
-
+
// Loop over all the operands of the specified instruction. If there is
// anything we can't handle, bail out.
MachineBasicBlock *ParentBlock = MI->getParent();
-
+
// SuccToSinkTo - This is the successor to sink this instruction to, once we
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
-
+
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
-
+
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
-
+
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
@@ -200,13 +203,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// it could get allocated to something with a def during allocation.
if (!RegInfo->def_empty(Reg))
return false;
+
if (AllocatableSet.test(Reg))
return false;
+
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
if (!RegInfo->def_empty(AliasReg))
return false;
+
if (AllocatableSet.test(AliasReg))
return false;
}
@@ -221,28 +227,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If it's not safe to move defs of the register class, then abort.
if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
return false;
-
+
// FIXME: This picks a successor to sink into based on having one
// successor that dominates all the uses. However, there are cases where
// sinking can happen but where the sink point isn't a successor. For
// example:
+ //
// x = computation
// if () {} else {}
// use x
- // the instruction could be sunk over the whole diamond for the
+ //
+ // the instruction could be sunk over the whole diamond for the
// if/then/else (or loop, etc), allowing it to be sunk into other blocks
// after that.
-
+
// Virtual register defs can only be sunk if all their uses are in blocks
// dominated by one of the successors.
if (SuccToSinkTo) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo))
return false;
+
continue;
}
-
+
// Otherwise, we should look at all the successors and decide which one
// we should sink to.
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
@@ -252,13 +261,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
break;
}
}
-
+
// If we couldn't find a block to sink to, ignore this instruction.
if (SuccToSinkTo == 0)
return false;
}
}
-
+
// If there are no outputs, it must have side-effects.
if (SuccToSinkTo == 0)
return false;
@@ -267,15 +276,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// landing pad is implicitly defined.
if (SuccToSinkTo->isLandingPad())
return false;
-
+
// It is not possible to sink an instruction into its own block. This can
// happen with loops.
if (MI->getParent() == SuccToSinkTo)
return false;
-
- DEBUG(dbgs() << "Sink instr " << *MI);
- DEBUG(dbgs() << "to block " << *SuccToSinkTo);
-
+
+ // If the instruction to move defines a dead physical register which is live
+ // when leaving the basic block, don't move it because it could turn into a
+ // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>)
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (SuccToSinkTo->isLiveIn(Reg))
+ return false;
+ }
+
+ DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo);
+
// If the block has multiple predecessors, this would introduce computation on
// a path that it doesn't already exist. We could split the critical edge,
// but for now we just punt.
@@ -305,18 +325,18 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Otherwise we are OK with sinking along a critical edge.
DEBUG(dbgs() << "Sinking along critical edge.\n");
}
-
- // Determine where to insert into. Skip phi nodes.
+
+ // Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
++InsertPos;
-
+
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
- // Conservatively, clear any kill flags, since it's possible that
- // they are no longer correct.
+ // Conservatively, clear any kill flags, since it's possible that they are no
+ // longer correct.
MI->clearKillInfo();
return true;
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 8baf01c..2297c90 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
- if (!MBB->empty() && MBB->back().getDesc().isBarrier()) {
+ if (!MBB->empty() && MBB->back().getDesc().isBarrier() &&
+ !TII->isPredicated(&MBB->back())) {
report("MBB exits via unconditional fall-through but ends with a "
"barrier instruction!", MBB);
}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
index 41fc204..dcdc243 100644
--- a/lib/CodeGen/OptimizeExts.cpp
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -118,6 +118,26 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
}
+ // It's an error to translate this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
+ //
+ // into this:
+ //
+ // %reg1025 = <sext> %reg1024
+ // ...
+ // %reg1027 = COPY %reg1025:4
+ // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
+ //
+ // The problem here is that SUBREG_TO_REG is there to assert that an
+ // implicit zext occurs. It doesn't insert a zext instruction. If we allow
+ // the COPY here, it will give us the value after the <sext>,
+ // not the original value of %reg1024 before <sext>.
+ if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
+ continue;
+
MachineBasicBlock *UseMBB = UseMI->getParent();
if (UseMBB == MBB) {
// Local uses that come after the extension.
@@ -165,8 +185,8 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
continue;
unsigned NewVR = MRI->createVirtualRegister(RC);
BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
- TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR)
- .addReg(DstReg).addImm(SubIdx);
+ TII->get(TargetOpcode::COPY), NewVR)
+ .addReg(DstReg, 0, SubIdx);
UseMO->setReg(NewVR);
++NumReuse;
Changed = true;
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 2717d4d..1613fe2 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -107,6 +107,11 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
SrcSubIdx == 0 && DstSubIdx == 0 &&
TargetRegisterInfo::isVirtualRegister(MvSrcReg))
SrcMI = MRI->getVRegDef(MvSrcReg);
+ else if (SrcMI && SrcMI->isCopy() &&
+ !SrcMI->getOperand(0).getSubReg() &&
+ !SrcMI->getOperand(1).getSubReg() &&
+ TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg()))
+ SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
if (!SrcMI)
return false;
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index bd18b52..02938df 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -406,7 +406,7 @@ namespace PBQP {
// Create node data objects.
for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
+ nItr != nEnd; ++nItr) {
nodeDataList.push_back(NodeData());
g.setNodeData(nItr, &nodeDataList.back());
}
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
index 30d34d9..4c1ce11 100644
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h
@@ -18,7 +18,6 @@
#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#include "llvm/Support/Compiler.h"
#include "../HeuristicSolver.h"
#include "../HeuristicBase.h"
@@ -267,8 +266,8 @@ namespace PBQP {
if (!nd.isHeuristic)
return;
- EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr);
-
+ EdgeData &ed = getHeuristicEdgeData(eItr);
+ (void)ed;
assert(ed.isUpToDate && "Edge data is not up to date.");
// Update node.
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index edbc13f..ea6b094 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -34,7 +34,6 @@
using namespace llvm;
STATISTIC(NumAtomic, "Number of atomic phis lowered");
-STATISTIC(NumSplits, "Number of critical edges split on demand");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
@@ -184,7 +183,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Create a new register for the incoming PHI arguments.
MachineFunction &MF = *MBB.getParent();
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
unsigned IncomingReg = 0;
bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI?
@@ -208,10 +206,12 @@ void llvm::PHIElimination::LowerAtomicPHINode(
++NumReused;
DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
} else {
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
}
- TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC,
- MPhi->getDebugLoc());
+ BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), DestReg)
+ .addReg(IncomingReg);
}
// Update live variable information if there is any.
@@ -293,8 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Insert the copy.
if (!reusedIncoming && IncomingReg)
- TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC,
- MPhi->getDebugLoc());
+ BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -391,57 +391,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
- SplitCriticalEdge(PreMBB, &MBB);
+ PreMBB->SplitCriticalEdge(&MBB, this);
}
}
return true;
}
-
-MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
- MachineBasicBlock *B) {
- assert(A && B && "Missing MBB end point");
-
- MachineFunction *MF = A->getParent();
-
- // We may need to update A's terminator, but we can't do that if AnalyzeBranch
- // fails. If A uses a jump table, we won't touch it.
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
- return NULL;
-
- ++NumSplits;
-
- MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
- MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
- DEBUG(dbgs() << "PHIElimination splitting critical edge:"
- " BB#" << A->getNumber()
- << " -- BB#" << NMBB->getNumber()
- << " -- BB#" << B->getNumber() << '\n');
-
- A->ReplaceUsesOfBlockWith(B, NMBB);
- A->updateTerminator();
-
- // Insert unconditional "jump B" instruction in NMBB if necessary.
- NMBB->addSuccessor(B);
- if (!NMBB->isLayoutSuccessor(B)) {
- Cond.clear();
- MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
- }
-
- // Fix PHI nodes in B so they refer to NMBB instead of A
- for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
- i != e && i->isPHI(); ++i)
- for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
- if (i->getOperand(ni+1).getMBB() == A)
- i->getOperand(ni+1).setMBB(NMBB);
-
- if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
- LV->addNewBlock(NMBB, A, B);
-
- if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
- MDT->addNewBlock(NMBB, A);
-
- return NMBB;
-}
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index 5ea2941..3489db2 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -24,6 +24,11 @@ using namespace llvm;
//===---------------------------------------------------------------------===//
MachinePassRegistry RegisterRegAlloc::Registry;
+static FunctionPass *createDefaultRegisterAllocator() { return 0; }
+static RegisterRegAlloc
+defaultRegAlloc("default",
+ "pick register allocator based on -O option",
+ createDefaultRegisterAllocator);
//===---------------------------------------------------------------------===//
///
@@ -33,8 +38,8 @@ MachinePassRegistry RegisterRegAlloc::Registry;
static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
- cl::init(&createLinearScanRegisterAllocator),
- cl::desc("Register allocator to use (default=linearscan)"));
+ cl::init(&createDefaultRegisterAllocator),
+ cl::desc("Register allocator to use"));
//===---------------------------------------------------------------------===//
@@ -42,13 +47,22 @@ RegAlloc("regalloc",
/// createRegisterAllocator - choose the appropriate register allocator.
///
//===---------------------------------------------------------------------===//
-FunctionPass *llvm::createRegisterAllocator() {
+FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) {
RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
+
if (!Ctor) {
Ctor = RegAlloc;
RegisterRegAlloc::setDefault(RegAlloc);
}
-
- return Ctor();
+
+ if (Ctor != createDefaultRegisterAllocator)
+ return Ctor();
+
+ // When the 'default' allocator is requested, pick one based on OptLevel.
+ switch (OptLevel) {
+ case CodeGenOpt::None:
+ return createFastRegisterAllocator();
+ default:
+ return createLinearScanRegisterAllocator();
+ }
}
diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index af5f289..cbde2b0 100644
--- a/lib/CodeGen/ExactHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
//
// The LLVM Compiler Infrastructure
//
@@ -13,8 +13,8 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "post-RA-sched"
-#include "ExactHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -22,10 +22,9 @@
using namespace llvm;
-ExactHazardRecognizer::
-ExactHazardRecognizer(const InstrItineraryData &LItinData) :
- ScheduleHazardRecognizer(), ItinData(LItinData)
-{
+PostRAHazardRecognizer::
+PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
+ ScheduleHazardRecognizer(), ItinData(LItinData) {
// Determine the maximum depth of any itinerary. This determines the
// depth of the scoreboard. We always make the scoreboard at least 1
// cycle deep to avoid dealing with the boundary condition.
@@ -48,16 +47,16 @@ ExactHazardRecognizer(const InstrItineraryData &LItinData) :
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = "
+ DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = "
<< ScoreboardDepth << '\n');
}
-void ExactHazardRecognizer::Reset() {
+void PostRAHazardRecognizer::Reset() {
RequiredScoreboard.reset();
ReservedScoreboard.reset();
}
-void ExactHazardRecognizer::ScoreBoard::dump() const {
+void PostRAHazardRecognizer::ScoreBoard::dump() const {
dbgs() << "Scoreboard:\n";
unsigned last = Depth - 1;
@@ -73,7 +72,8 @@ void ExactHazardRecognizer::ScoreBoard::dump() const {
}
}
-ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) {
+ScheduleHazardRecognizer::HazardType
+PostRAHazardRecognizer::getHazardType(SUnit *SU) {
if (ItinData.isEmpty())
return NoHazard;
@@ -120,7 +120,7 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
return NoHazard;
}
-void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
+void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
if (ItinData.isEmpty())
return;
@@ -174,7 +174,7 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
DEBUG(RequiredScoreboard.dump());
}
-void ExactHazardRecognizer::AdvanceCycle() {
+void PostRAHazardRecognizer::AdvanceCycle() {
ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 9714ea6..4af8e07 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,8 +22,6 @@
#include "AntiDepBreaker.h"
#include "AggressiveAntiDepBreaker.h"
#include "CriticalAntiDepBreaker.h"
-#include "ExactHazardRecognizer.h"
-#include "SimpleHazardRecognizer.h"
#include "ScheduleDAGInstrs.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -65,10 +63,6 @@ EnableAntiDepBreaking("break-anti-dependencies",
cl::desc("Break post-RA scheduling anti-dependencies: "
"\"critical\", \"all\", or \"none\""),
cl::init("none"), cl::Hidden);
-static cl::opt<bool>
-EnablePostRAHazardAvoidance("avoid-hazards",
- cl::desc("Enable exact hazard avoidance"),
- cl::init(true), cl::Hidden);
// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod
static cl::opt<int>
@@ -85,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { }
namespace {
class PostRAScheduler : public MachineFunctionPass {
AliasAnalysis *AA;
+ const TargetInstrInfo *TII;
CodeGenOpt::Level OptLevel;
public:
@@ -187,30 +182,9 @@ namespace {
};
}
-/// isSchedulingBoundary - Test if the given instruction should be
-/// considered a scheduling boundary. This primarily includes labels
-/// and terminators.
-///
-static bool isSchedulingBoundary(const MachineInstr *MI,
- const MachineFunction &MF) {
- // Terminators and labels can't be scheduled around.
- if (MI->getDesc().isTerminator() || MI->isLabel())
- return true;
-
- // Don't attempt to schedule around any instruction that defines
- // a stack-oriented pointer, as it's unlikely to be profitable. This
- // saves compile time, because it doesn't require every single
- // stack slot reference to depend on the instruction that does the
- // modification.
- const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
- return true;
-
- return false;
-}
-
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
AA = &getAnalysis<AliasAnalysis>();
+ TII = Fn.getTarget().getInstrInfo();
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -237,10 +211,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData();
- ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
- (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
- (ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
+ const TargetMachine &TM = Fn.getTarget();
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ ScheduleHazardRecognizer *HR =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
AntiDepBreaker *ADB =
((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
@@ -271,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
MachineBasicBlock::iterator Current = MBB->end();
unsigned Count = MBB->size(), CurrentCount = Count;
for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) {
- MachineInstr *MI = prior(I);
- if (isSchedulingBoundary(MI, Fn)) {
+ MachineInstr *MI = llvm::prior(I);
+ if (TII->isSchedulingBoundary(MI, MBB, Fn)) {
Scheduler.Run(MBB, I, Current, CurrentCount);
Scheduler.EmitSchedule();
Current = MI;
@@ -680,15 +654,6 @@ void SchedulePostRATDList::ListScheduleTopDown() {
ScheduleNodeTopDown(FoundSUnit, CurCycle);
HazardRec->EmitInstruction(FoundSUnit);
CycleHasInsts = true;
-
- // If we are using the target-specific hazards, then don't
- // advance the cycle time just because we schedule a node. If
- // the target allows it we can schedule multiple nodes in the
- // same cycle.
- if (!EnablePostRAHazardAvoidance) {
- if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
- }
} else {
if (CycleHasInsts) {
DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n');
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 96e7327..fb2f909 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -512,9 +512,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
// FIXME: Need to set kills properly for inter-block stuff.
- if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex);
- if (IsIntraBlock)
- RetVNI->addKill(EndIndex);
} else if (ContainsDefs && ContainsUses) {
SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
@@ -556,12 +553,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
NewVNs, LiveOut, Phis, false, true);
LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-
- if (foundUse && RetVNI->isKill(StartIndex))
- RetVNI->removeKill(StartIndex);
- if (IsIntraBlock) {
- RetVNI->addKill(EndIndex);
- }
}
// Memoize results so we don't have to recompute them.
@@ -636,9 +627,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
I->second->setHasPHIKill(true);
- SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true);
- if (!I->second->isKill(KillIndex))
- I->second->addKill(KillIndex);
}
}
@@ -648,8 +636,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
} else
EndIndex = LIs->getMBBEndIdx(MBB);
LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
- if (IsIntraBlock)
- RetVNI->addKill(EndIndex);
// Memoize results so we don't have to recompute them.
if (!IsIntraBlock)
@@ -691,10 +677,12 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
// If the def is a move, set the copy field.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
if (DstReg == LI->reg)
NewVN->setCopy(&*DI);
-
+ } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
+ NewVN->setCopy(&*DI);
+
NewVNs[&*DI] = NewVN;
}
@@ -725,25 +713,6 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
VNInfo* DeadVN = NewVNs[&*DI];
LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
- DeadVN->addKill(DefIdx);
- }
-
- // Update kill markers.
- for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end();
- VI != VE; ++VI) {
- VNInfo* VNI = *VI;
- for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) {
- SlotIndex KillIdx = VNI->kills[i];
- if (KillIdx.isPHI())
- continue;
- MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx);
- if (KillMI) {
- MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg);
- if (KillMO)
- // It could be a dead def.
- KillMO->setIsKill();
- }
- }
}
}
@@ -773,19 +742,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
VNsToCopy.push_back(OldVN);
// Locate two-address redefinitions
- for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(),
- KE = OldVN->kills.end(); KI != KE; ++KI) {
- assert(!KI->isPHI() &&
- "VN previously reported having no PHI kills.");
- MachineInstr* MI = LIs->getInstructionFromIndex(*KI);
- unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg);
- if (DefIdx == ~0U) continue;
- if (MI->isRegTiedToUseOperand(DefIdx)) {
- VNInfo* NextVN =
- CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex());
- if (NextVN == OldVN) continue;
+ for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg),
+ DE = MRI->def_end(); DI != DE; ++DI) {
+ if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue;
+ SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex();
+ VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx);
+ if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) !=
+ VNsToCopy.end())
Stack.push_back(NextVN);
- }
}
}
@@ -836,7 +800,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
if (IntervalSSMap.count(CurrLI->reg))
IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
- NumRenumbers++;
+ ++NumRenumbers;
}
bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
@@ -854,7 +818,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
if (KillPt == DefMI->getParent()->end())
return false;
- TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+ TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI);
SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
ReconstructLiveInterval(CurrLI);
@@ -899,12 +863,11 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
}
- MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
- FoldPt, Ops, SS);
+ MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
if (FMI) {
LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
- FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ FoldPt->eraseFromParent();
++NumFolds;
IntervalSSMap[vreg] = SS;
@@ -980,12 +943,11 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg,
if (!TII->canFoldMemoryOperand(FoldPt, Ops))
return 0;
- MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
- FoldPt, Ops, SS);
+ MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
if (FMI) {
LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
- FMI = MBB->insert(MBB->erase(FoldPt), FMI);
+ FoldPt->eraseFromParent();
++NumRestoreFolds;
}
@@ -1192,7 +1154,7 @@ unsigned PreAllocSplitting::getNumberOfNonSpills(
int StoreFrameIndex;
unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
- NonSpills++;
+ ++NonSpills;
int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
@@ -1255,7 +1217,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
VNUseCount.erase(CurrVN);
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
continue;
}
@@ -1291,9 +1253,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
Ops.push_back(OpIdx);
if (!TII->canFoldMemoryOperand(use, Ops)) continue;
- MachineInstr* NewMI =
- TII->foldMemoryOperand(*use->getParent()->getParent(),
- use, Ops, FrameIndex);
+ MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex);
if (!NewMI) continue;
@@ -1303,10 +1263,9 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
- MachineBasicBlock* MBB = use->getParent();
- NewMI = MBB->insert(MBB->erase(use), NewMI);
+ use->eraseFromParent();
VNUseCount[CurrVN].erase(use);
-
+
// Remove deleted instructions. Note that we need to remove them from
// the VNInfo->use map as well, just to be safe.
for (SmallPtrSet<MachineInstr*, 4>::iterator II =
@@ -1328,7 +1287,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
if (VI->second.erase(use))
VI->second.insert(NewMI);
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
continue;
}
@@ -1350,7 +1309,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
LIs->RemoveMachineInstrFromMaps(DefMI);
(*LI)->removeValNo(CurrVN);
DefMI->eraseFromParent();
- NumDeadSpills++;
+ ++NumDeadSpills;
changed = true;
}
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 62f525f..ca4c477 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -46,14 +46,14 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
const TargetInstrInfo *tii_) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0)
+ Reg == SrcReg && DstSubReg == 0)
return true;
- if (OpIdx == 2 && MI->isSubregToReg())
- return true;
- if (OpIdx == 1 && MI->isExtractSubreg())
- return true;
- return false;
+ switch(OpIdx) {
+ case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0;
+ case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0;
+ default: return false;
+ }
}
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
@@ -101,11 +101,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
continue;
}
- if (MI->isInsertSubreg()) {
- MachineOperand &MO = MI->getOperand(2);
+ // Eliminate %reg1032:sub<def> = COPY undef.
+ if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
+ MachineOperand &MO = MI->getOperand(1);
if (ImpDefRegs.count(MO.getReg())) {
- // %reg1032<def> = INSERT_SUBREG %reg1032, undef, 2
- // This is an identity copy, eliminate it now.
if (MO.isKill()) {
LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
vi.removeKill(MI);
@@ -119,7 +118,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
bool ChangedToImpDef = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand& MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || MO.isUndef())
+ if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
@@ -144,6 +143,12 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
Changed = true;
MO.setIsUndef();
+ // This is a partial register redef of an implicit def.
+ // Make sure the whole register is defined by the instruction.
+ if (MO.isDef()) {
+ MI->addRegisterDefined(Reg);
+ continue;
+ }
if (MO.isKill() || MI->isRegTiedToDefOperand(i)) {
// Make sure other uses of
for (unsigned j = i+1; j != e; ++j) {
@@ -219,8 +224,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
// Turn a copy use into an implicit_def.
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) {
+ if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg &&
+ RMI->getOperand(0).getSubReg() == 0) ||
+ (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ Reg == SrcReg && DstSubReg == 0)) {
RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
bool isKill = false;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e778024..3843b25 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
AdjustsStack = true;
FrameSDOps.push_back(I);
} else if (I->isInlineAsm()) {
- // An InlineAsm might be a call; assume it is to get the stack frame
- // aligned correctly for calls.
- AdjustsStack = true;
+ // Some inline asm's need a stack frame, as indicated by operand 1.
+ if (I->getOperand(1).getImm())
+ AdjustsStack = true;
}
MFI->setAdjustsStack(AdjustsStack);
@@ -202,22 +202,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if (Fn.getFunction()->hasFnAttr(Attribute::Naked))
return;
- // Figure out which *callee saved* registers are modified by the current
- // function, thus needing to be saved and restored in the prolog/epilog.
- const TargetRegisterClass * const *CSRegClasses =
- RegInfo->getCalleeSavedRegClasses(&Fn);
-
std::vector<CalleeSavedInfo> CSI;
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
if (Fn.getRegInfo().isPhysRegUsed(Reg)) {
// If the reg is modified, save it!
- CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ CSI.push_back(CalleeSavedInfo(Reg));
} else {
for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg);
*AliasSet; ++AliasSet) { // Check alias registers too.
if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) {
- CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i]));
+ CSI.push_back(CalleeSavedInfo(Reg));
break;
}
}
@@ -236,7 +231,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
for (std::vector<CalleeSavedInfo>::iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I) {
unsigned Reg = I->getReg();
- const TargetRegisterClass *RC = I->getRegClass();
+ const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
int FrameIdx;
if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) {
@@ -265,8 +260,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
- true, false);
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true);
}
I->setFrameIdx(FrameIdx);
@@ -303,8 +297,10 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
EntryBlock->addLiveIn(CSI[i].getReg());
// Insert the spill to the stack frame.
- TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
- CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI);
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*EntryBlock, I, Reg, true,
+ CSI[i].getFrameIdx(), RC, TRI);
}
}
@@ -328,9 +324,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// terminators that preceed it.
if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
+ unsigned Reg = CSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
CSI[i].getFrameIdx(),
- CSI[i].getRegClass(), TRI);
+ RC, TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -374,10 +372,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
MBB->addLiveIn(blockCSI[i].getReg());
// Insert the spill to the stack frame.
- TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(*MBB, I, Reg,
true,
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass(), TRI);
+ RC, TRI);
}
}
@@ -423,9 +423,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that preceed it.
for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
- TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
+ unsigned Reg = blockCSI[i].getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(*MBB, I, Reg,
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass(), TRI);
+ RC, TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -639,6 +641,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
+#ifndef NDEBUG
+ int SPAdjCount = 0; // frame setup / destroy count.
+#endif
int SPAdj = 0; // SP offset due to call frame setup / destroy.
if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
@@ -646,6 +651,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (I->getOpcode() == FrameSetupOpcode ||
I->getOpcode() == FrameDestroyOpcode) {
+#ifndef NDEBUG
+ // Track whether we see even pairs of them
+ SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1;
+#endif
// Remember how much SP has been adjusted to create the call
// frame.
int Size = I->getOperand(0).getImm();
@@ -712,7 +721,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
}
- assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
+ // If we have evenly matched pairs of frame setup / destroy instructions,
+ // make sure the adjustments come out to zero. If we don't have matched
+ // pairs, we can't be sure the missing bit isn't in another basic block
+ // due to a custom inserter playing tricks, so just asserting SPAdj==0
+ // isn't sufficient. See tMOVCC on Thumb1, for example.
+ assert((SPAdjCount || SPAdj == 0) &&
+ "Unbalanced call frame setup / destroy pairs?");
}
}
@@ -870,11 +885,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
// Scavenge a new scratch register
CurrentVirtReg = Reg;
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
- CurrentScratchReg = RS->FindUnusedReg(RC);
- if (CurrentScratchReg == 0)
- // No register is "free". Scavenge a register.
- CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
-
+ CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj);
PrevValue = Value;
}
// replace this reference to the virtual register with the
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index b3b5760..f44478e 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -110,6 +110,11 @@ namespace {
// Allocatable - vector of allocatable physical registers.
BitVector Allocatable;
+ // SkippedInstrs - Descriptors of instructions whose clobber list was ignored
+ // because all registers were spilled. It is still necessary to mark all the
+ // clobbered registers as used by the function.
+ SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+
// isBulkSpilling - This flag is set when LiveRegMap will be cleared
// completely after spilling all live registers. LiveRegMap entries should
// not be erased.
@@ -135,6 +140,8 @@ namespace {
private:
bool runOnMachineFunction(MachineFunction &Fn);
void AllocateBasicBlock();
+ void handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead);
int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
bool isLastUseOfLocalReg(MachineOperand&);
@@ -508,27 +515,20 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
bool New;
tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
LiveReg &LR = LRI->second;
- bool PartialRedef = MI->getOperand(OpNum).getSubReg();
if (New) {
// If there is no hint, peek at the only use of this register.
if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
+ const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg);
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
// It's a copy, use the destination register as a hint.
- if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg),
- SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (UseMI.isCopyLike())
+ Hint = UseMI.getOperand(0).getReg();
+ else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
Hint = DstReg;
}
allocVirtReg(MI, *LRI, Hint);
- // If this is only a partial redefinition, we must reload the other parts.
- if (PartialRedef && MI->readsVirtualRegister(VirtReg)) {
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- int FI = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n");
- TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI);
- ++NumLoads;
- }
- } else if (LR.LastUse && !PartialRedef) {
+ } else if (LR.LastUse) {
// Redefining a live register - kill at the last use, unless it is this
// instruction defining VirtReg multiple times.
if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
@@ -564,10 +564,16 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
} else if (LR.Dirty) {
if (isLastUseOfLocalReg(MO)) {
DEBUG(dbgs() << "Killing last use: " << MO << "\n");
- MO.setIsKill();
+ if (MO.isUse())
+ MO.setIsKill();
+ else
+ MO.setIsDead();
} else if (MO.isKill()) {
DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n");
+ MO.setIsDead(false);
}
} else if (MO.isKill()) {
// We must remove kill flags from uses of reloaded registers because the
@@ -576,6 +582,9 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
// This would cause a second reload of %x into a different register.
DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
MO.setIsKill(false);
+ } else if (MO.isDead()) {
+ DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n");
+ MO.setIsDead(false);
}
assert(LR.PhysReg && "Register not assigned");
LR.LastUse = MI;
@@ -607,6 +616,91 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
return MO.isDead();
}
+// Handle special instruction operand like early clobbers and tied ops when
+// there are additional physreg defines.
+void RAFast::handleThroughOperands(MachineInstr *MI,
+ SmallVectorImpl<unsigned> &VirtDead) {
+ DEBUG(dbgs() << "Scanning for through registers:");
+ SmallSet<unsigned, 8> ThroughRegs;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
+ (MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
+ if (ThroughRegs.insert(Reg))
+ DEBUG(dbgs() << " %reg" << Reg);
+ }
+ }
+
+ // If any physreg defines collide with preallocated through registers,
+ // we must spill and reallocate.
+ DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ if (ThroughRegs.count(PhysRegState[Reg]))
+ definePhysReg(MI, Reg, regFree);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) {
+ UsedInInstr.set(*AS);
+ if (ThroughRegs.count(PhysRegState[*AS]))
+ definePhysReg(MI, *AS, regFree);
+ }
+ }
+
+ SmallVector<unsigned, 8> PartialDefs;
+ DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n");
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (MO.isUse()) {
+ unsigned DefIdx = 0;
+ if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
+ DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand "
+ << DefIdx << ".\n");
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ // Note: we don't update the def operand yet. That would cause the normal
+ // def-scan to attempt spilling.
+ } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) {
+ DEBUG(dbgs() << "Partial redefine: " << MO << "\n");
+ // Reload the register, but don't assign to the operand just yet.
+ // That would confuse the later phys-def processing pass.
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0);
+ PartialDefs.push_back(LRI->second.PhysReg);
+ } else if (MO.isEarlyClobber()) {
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ if (setPhysReg(MI, i, PhysReg))
+ VirtDead.push_back(Reg);
+ }
+ }
+
+ // Restore UsedInInstr to a state usable for allocating normal virtual uses.
+ UsedInInstr.reset();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ UsedInInstr.set(*AS);
+ }
+
+ // Also mark PartialDefs as used to avoid reallocation.
+ for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i)
+ UsedInInstr.set(PartialDefs[i]);
+}
+
void RAFast::AllocateBasicBlock() {
DEBUG(dbgs() << "\nAllocating " << *MBB);
@@ -620,7 +714,7 @@ void RAFast::AllocateBasicBlock() {
E = MBB->livein_end(); I != E; ++I)
definePhysReg(MII, *I, regReserved);
- SmallVector<unsigned, 8> PhysECs, VirtDead;
+ SmallVector<unsigned, 8> VirtDead;
SmallVector<MachineInstr*, 32> Coalesced;
// Otherwise, sequentially allocate each instruction in the MBB.
@@ -670,8 +764,25 @@ void RAFast::AllocateBasicBlock() {
LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
if (LRI != LiveVirtRegs.end())
setPhysReg(MI, i, LRI->second.PhysReg);
- else
- MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ else {
+ int SS = StackSlotForVirtReg[Reg];
+ if (SS == -1)
+ MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ else {
+ // Modify DBG_VALUE now that the value is in a spill slot.
+ uint64_t Offset = MI->getOperand(1).getImm();
+ const MDNode *MDPtr =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ DebugLoc DL = MI->getDebugLoc();
+ if (MachineInstr *NewDV =
+ TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) {
+ DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI);
+ MachineBasicBlock *MBB = MI->getParent();
+ MBB->insert(MBB->erase(MI), NewDV);
+ } else
+ MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
+ }
+ }
}
// Next instruction.
continue;
@@ -679,17 +790,25 @@ void RAFast::AllocateBasicBlock() {
// If this is a copy, we may be able to coalesce.
unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
- if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
+ if (MI->isCopy()) {
+ CopyDst = MI->getOperand(0).getReg();
+ CopySrc = MI->getOperand(1).getReg();
+ CopyDstSub = MI->getOperand(0).getSubReg();
+ CopySrcSub = MI->getOperand(1).getSubReg();
+ } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
CopySrc = CopyDst = 0;
// Track registers used by instruction.
UsedInInstr.reset();
- PhysECs.clear();
// First scan.
// Mark physreg uses and early clobbers as used.
// Find the end of the virtreg operands
unsigned VirtOpEnd = 0;
+ bool hasTiedOps = false;
+ bool hasEarlyClobbers = false;
+ bool hasPartialRedefs = false;
+ bool hasPhysDefs = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -697,20 +816,44 @@ void RAFast::AllocateBasicBlock() {
if (!Reg) continue;
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
VirtOpEnd = i+1;
+ if (MO.isUse()) {
+ hasTiedOps = hasTiedOps ||
+ TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+ } else {
+ if (MO.isEarlyClobber())
+ hasEarlyClobbers = true;
+ if (MO.getSubReg() && MI->readsVirtualRegister(Reg))
+ hasPartialRedefs = true;
+ }
continue;
}
if (!Allocatable.test(Reg)) continue;
if (MO.isUse()) {
usePhysReg(MO);
} else if (MO.isEarlyClobber()) {
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
- PhysECs.push_back(Reg);
- }
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
+ hasEarlyClobbers = true;
+ } else
+ hasPhysDefs = true;
+ }
+
+ // The instruction may have virtual register operands that must be allocated
+ // the same register at use-time and def-time: early clobbers and tied
+ // operands. If there are also physical defs, these registers must avoid
+ // both physical defs and uses, making them more constrained than normal
+ // operands.
+ // We didn't detect inline asm tied operands above, so just make this extra
+ // pass for all inline asm.
+ if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
+ (hasTiedOps && hasPhysDefs)) {
+ handleThroughOperands(MI, VirtDead);
+ // Don't attempt coalescing when we have funny stuff going on.
+ CopyDst = 0;
}
// Second scan.
- // Allocate virtreg uses and early clobbers.
- // Collect VirtKills
+ // Allocate virtreg uses.
for (unsigned i = 0; i != VirtOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
@@ -722,12 +865,6 @@ void RAFast::AllocateBasicBlock() {
CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
if (setPhysReg(MI, i, PhysReg))
killVirtReg(LRI);
- } else if (MO.isEarlyClobber()) {
- // Note: defineVirtReg may invalidate MO.
- LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
- unsigned PhysReg = LRI->second.PhysReg;
- setPhysReg(MI, i, PhysReg);
- PhysECs.push_back(PhysReg);
}
}
@@ -735,12 +872,16 @@ void RAFast::AllocateBasicBlock() {
// Track registers defined by instruction - early clobbers at this point.
UsedInInstr.reset();
- for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) {
- unsigned PhysReg = PhysECs[i];
- UsedInInstr.set(PhysReg);
- for (const unsigned *AS = TRI->getAliasSet(PhysReg);
- unsigned Alias = *AS; ++AS)
- UsedInInstr.set(Alias);
+ if (hasEarlyClobbers) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ UsedInInstr.set(Reg);
+ for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
+ UsedInInstr.set(*AS);
+ }
}
unsigned DefOpEnd = MI->getNumOperands();
@@ -752,13 +893,18 @@ void RAFast::AllocateBasicBlock() {
DefOpEnd = VirtOpEnd;
DEBUG(dbgs() << " Spilling remaining registers before call.\n");
spillAll(MI);
+
+ // The imp-defs are skipped below, but we still need to mark those
+ // registers as used by the function.
+ SkippedInstrs.insert(&TID);
}
// Third scan.
// Allocate defs and collect dead defs.
for (unsigned i = 0; i != DefOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue;
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
unsigned Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -837,6 +983,14 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
// Make sure the set of used physregs is closed under subreg operations.
MRI->closePhysRegsUsed(*TRI);
+ // Add the clobber lists for all the instructions we skipped earlier.
+ for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+ I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
+ if (const unsigned *Defs = (*I)->getImplicitDefs())
+ while (*Defs)
+ MRI->setPhysRegUsed(*Defs++);
+
+ SkippedInstrs.clear();
StackSlotForVirtReg.clear();
return true;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index bc331f0..044672d 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -83,7 +83,8 @@ namespace {
// pressure, it can caused fewer GPRs to be held in the queue.
static cl::opt<unsigned>
NumRecentlyUsedRegs("linearscan-skip-count",
- cl::desc("Number of registers for linearscan to remember to skip."),
+ cl::desc("Number of registers for linearscan to remember"
+ "to skip."),
cl::init(0),
cl::Hidden);
@@ -421,9 +422,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (vni->def != SlotIndex() && vni->isDefAccurate() &&
(CopyMI = li_->getInstructionFromIndex(vni->def)) &&
- tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ (CopyMI->isCopy() ||
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)))
// Defined by a copy, try to extend SrcReg forward
- CandReg = SrcReg;
+ CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg;
else if (TrivCoalesceEnds &&
(CopyMI =
li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
@@ -992,6 +994,24 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
mri_->setRegAllocationHint(cur->reg, 0, Reg);
}
+ } else if (CopyMI && CopyMI->isCopy()) {
+ DstReg = CopyMI->getOperand(0).getReg();
+ DstSubReg = CopyMI->getOperand(0).getSubReg();
+ SrcReg = CopyMI->getOperand(1).getReg();
+ SrcSubReg = CopyMI->getOperand(1).getSubReg();
+ unsigned Reg = 0;
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ Reg = SrcReg;
+ else if (vrm_->isAssignedReg(SrcReg))
+ Reg = vrm_->getPhys(SrcReg);
+ if (Reg) {
+ if (SrcSubReg)
+ Reg = tri_->getSubReg(Reg, SrcSubReg);
+ if (DstSubReg)
+ Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
+ if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
+ mri_->setRegAllocationHint(cur->reg, 0, Reg);
+ }
}
}
}
@@ -1206,8 +1226,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n');
SmallVector<LiveInterval*, 8> spillIs;
std::vector<LiveInterval*> added;
-
- added = spiller_->spill(cur, spillIs);
+ spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
addStackInterval(cur, ls_, li_, mri_, *vrm_);
@@ -1285,10 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
- std::vector<LiveInterval*> newIs;
- newIs = spiller_->spill(sli, spillIs, &earliestStart);
+ spiller_->spill(sli, added, spillIs, &earliestStart);
addStackInterval(sli, ls_, li_, mri_, *vrm_);
- std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
spilled.insert(sli->reg);
}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
deleted file mode 100644
index 321ae12..0000000
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ /dev/null
@@ -1,1254 +0,0 @@
-//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This register allocator allocates registers to a basic block at a time,
-// attempting to keep values in registers and reusing registers as appropriate.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regalloc"
-#include "llvm/BasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-using namespace llvm;
-
-STATISTIC(NumStores, "Number of stores added");
-STATISTIC(NumLoads , "Number of loads added");
-STATISTIC(NumCopies, "Number of copies coalesced");
-
-static RegisterRegAlloc
- localRegAlloc("local", "local register allocator",
- createLocalRegisterAllocator);
-
-namespace {
- class RALocal : public MachineFunctionPass {
- public:
- static char ID;
- RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
- private:
- const TargetMachine *TM;
- MachineFunction *MF;
- MachineRegisterInfo *MRI;
- const TargetRegisterInfo *TRI;
- const TargetInstrInfo *TII;
-
- // StackSlotForVirtReg - Maps virtual regs to the frame index where these
- // values are spilled.
- IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
-
- // Virt2PhysRegMap - This map contains entries for each virtual register
- // that is currently available in a physical register.
- IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
-
- unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
- return Virt2PhysRegMap[VirtReg];
- }
-
- // PhysRegsUsed - This array is effectively a map, containing entries for
- // each physical register that currently has a value (ie, it is in
- // Virt2PhysRegMap). The value mapped to is the virtual register
- // corresponding to the physical register (the inverse of the
- // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
- // because it is used by a future instruction, and to -2 if it is not
- // allocatable. If the entry for a physical register is -1, then the
- // physical register is "not in the map".
- //
- std::vector<int> PhysRegsUsed;
-
- // PhysRegsUseOrder - This contains a list of the physical registers that
- // currently have a virtual register value in them. This list provides an
- // ordering of registers, imposing a reallocation order. This list is only
- // used if all registers are allocated and we have to spill one, in which
- // case we spill the least recently used register. Entries at the front of
- // the list are the least recently used registers, entries at the back are
- // the most recently used.
- //
- std::vector<unsigned> PhysRegsUseOrder;
-
- // Virt2LastUseMap - This maps each virtual register to its last use
- // (MachineInstr*, operand index pair).
- IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
- Virt2LastUseMap;
-
- std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- return Virt2LastUseMap[Reg];
- }
-
- // VirtRegModified - This bitset contains information about which virtual
- // registers need to be spilled back to memory when their registers are
- // scavenged. If a virtual register has simply been rematerialized, there
- // is no reason to spill it to memory when we need the register back.
- //
- BitVector VirtRegModified;
-
- // UsedInMultipleBlocks - Tracks whether a particular register is used in
- // more than one block.
- BitVector UsedInMultipleBlocks;
-
- void markVirtRegModified(unsigned Reg, bool Val = true) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- if (Val)
- VirtRegModified.set(Reg);
- else
- VirtRegModified.reset(Reg);
- }
-
- bool isVirtRegModified(unsigned Reg) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
- VirtRegModified.size() && "Illegal virtual register!");
- return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
- }
-
- void AddToPhysRegsUseOrder(unsigned Reg) {
- std::vector<unsigned>::iterator It =
- std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg);
- if (It != PhysRegsUseOrder.end())
- PhysRegsUseOrder.erase(It);
- PhysRegsUseOrder.push_back(Reg);
- }
-
- void MarkPhysRegRecentlyUsed(unsigned Reg) {
- if (PhysRegsUseOrder.empty() ||
- PhysRegsUseOrder.back() == Reg) return; // Already most recently used
-
- for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) {
- unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
- if (!areRegsEqual(Reg, RegMatch)) continue;
-
- PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
- // Add it to the end of the list
- PhysRegsUseOrder.push_back(RegMatch);
- if (RegMatch == Reg)
- return; // Found an exact match, exit early
- }
- }
-
- public:
- virtual const char *getPassName() const {
- return "Local Register Allocator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequiredID(PHIEliminationID);
- AU.addRequiredID(TwoAddressInstructionPassID);
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- private:
- /// runOnMachineFunction - Register allocate the whole function
- bool runOnMachineFunction(MachineFunction &Fn);
-
- /// AllocateBasicBlock - Register allocate the specified basic block.
- void AllocateBasicBlock(MachineBasicBlock &MBB);
-
-
- /// areRegsEqual - This method returns true if the specified registers are
- /// related to each other. To do this, it checks to see if they are equal
- /// or if the first register is in the alias set of the second register.
- ///
- bool areRegsEqual(unsigned R1, unsigned R2) const {
- if (R1 == R2) return true;
- for (const unsigned *AliasSet = TRI->getAliasSet(R2);
- *AliasSet; ++AliasSet) {
- if (*AliasSet == R1) return true;
- }
- return false;
- }
-
- /// getStackSpaceFor - This returns the frame index of the specified virtual
- /// register on the stack, allocating space if necessary.
- int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
-
- /// removePhysReg - This method marks the specified physical register as no
- /// longer being in use.
- ///
- void removePhysReg(unsigned PhysReg);
-
- void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg, bool isKill);
-
- /// spillVirtReg - This method spills the value specified by PhysReg into
- /// the virtual register slot specified by VirtReg. It then updates the RA
- /// data structures to indicate the fact that PhysReg is now available.
- ///
- void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned VirtReg, unsigned PhysReg);
-
- /// spillPhysReg - This method spills the specified physical register into
- /// the virtual register slot associated with it. If OnlyVirtRegs is set to
- /// true, then the request is ignored if the physical register does not
- /// contain a virtual register.
- ///
- void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs = false);
-
- /// assignVirtToPhysReg - This method updates local state so that we know
- /// that PhysReg is the proper container for VirtReg now. The physical
- /// register must not be used for anything else when this is called.
- ///
- void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
-
- /// isPhysRegAvailable - Return true if the specified physical register is
- /// free and available for use. This also includes checking to see if
- /// aliased registers are all free...
- ///
- bool isPhysRegAvailable(unsigned PhysReg) const;
-
- /// getFreeReg - Look to see if there is a free register available in the
- /// specified register class. If not, return 0.
- ///
- unsigned getFreeReg(const TargetRegisterClass *RC);
-
- /// getReg - Find a physical register to hold the specified virtual
- /// register. If all compatible physical registers are used, this method
- /// spills the last used virtual register to the stack, and uses that
- /// register. If NoFree is true, that means the caller knows there isn't
- /// a free register, do not call getFreeReg().
- unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned VirtReg, bool NoFree = false);
-
- /// reloadVirtReg - This method transforms the specified virtual
- /// register use to refer to a physical register. This method may do this
- /// in one of several ways: if the register is available in a physical
- /// register already, it uses that physical register. If the value is not
- /// in a physical register, and if there are physical registers available,
- /// it loads it into a register: PhysReg if that is an available physical
- /// register, otherwise any physical register of the right class.
- /// If register pressure is high, and it is possible, it tries to fold the
- /// load of the virtual register into the instruction itself. It avoids
- /// doing this if register pressure is low to improve the chance that
- /// subsequent instructions can use the reloaded value. This method
- /// returns the modified instruction.
- ///
- MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
- unsigned PhysReg);
-
- /// ComputeLocalLiveness - Computes liveness of registers within a basic
- /// block, setting the killed/dead flags as appropriate.
- void ComputeLocalLiveness(MachineBasicBlock& MBB);
-
- void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
- unsigned PhysReg);
- };
- char RALocal::ID = 0;
-}
-
-/// getStackSpaceFor - This allocates space for the specified virtual register
-/// to be held on the stack.
-int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
- // Find the location Reg would belong...
- int SS = StackSlotForVirtReg[VirtReg];
- if (SS != -1)
- return SS; // Already has space allocated?
-
- // Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
-
- // Assign the slot.
- StackSlotForVirtReg[VirtReg] = FrameIdx;
- return FrameIdx;
-}
-
-
-/// removePhysReg - This method marks the specified physical register as no
-/// longer being in use.
-///
-void RALocal::removePhysReg(unsigned PhysReg) {
- PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
-
- std::vector<unsigned>::iterator It =
- std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg);
- if (It != PhysRegsUseOrder.end())
- PhysRegsUseOrder.erase(It);
-}
-
-/// storeVirtReg - Store a virtual register to its assigned stack slot.
-void RALocal::storeVirtReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg,
- bool isKill) {
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << " to stack slot #" << FrameIndex);
- TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI);
- ++NumStores; // Update statistics
-
- // Mark the spill instruction as last use if we're not killing the register.
- if (!isKill) {
- MachineInstr *Spill = llvm::prior(I);
- int OpNum = Spill->findRegisterUseOperandIdx(PhysReg);
- if (OpNum < 0)
- getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0);
- else
- getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum);
- }
-}
-
-/// spillVirtReg - This method spills the value specified by PhysReg into the
-/// virtual register slot specified by VirtReg. It then updates the RA data
-/// structures to indicate the fact that PhysReg is now available.
-///
-void RALocal::spillVirtReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg) {
- assert(VirtReg && "Spilling a physical register is illegal!"
- " Must not have appropriate kill for the register or use exists beyond"
- " the intended one.");
- DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg)
- << " containing %reg" << VirtReg);
-
- if (!isVirtRegModified(VirtReg)) {
- DEBUG(dbgs() << " which has not been modified, so no store necessary!");
- std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
- if (LastUse.first)
- LastUse.first->getOperand(LastUse.second).setIsKill();
- } else {
- // Otherwise, there is a virtual register corresponding to this physical
- // register. We only need to spill it into its stack slot if it has been
- // modified.
- // If the instruction reads the register that's spilled, (e.g. this can
- // happen if it is a move to a physical register), then the spill
- // instruction is not a kill.
- bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
- storeVirtReg(MBB, I, VirtReg, PhysReg, isKill);
- }
-
- getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
-
- DEBUG(dbgs() << '\n');
- removePhysReg(PhysReg);
-}
-
-
-/// spillPhysReg - This method spills the specified physical register into the
-/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
-/// then the request is ignored if the physical register does not contain a
-/// virtual register.
-///
-void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs) {
- if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
- assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
- if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
- spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
- return;
- }
-
- // If the selected register aliases any other registers, we must make
- // sure that one of the aliases isn't alive.
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register.
- PhysRegsUsed[*AliasSet] == -2) // If allocatable.
- continue;
-
- if (PhysRegsUsed[*AliasSet])
- spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
- }
-}
-
-
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now. The physical
-/// register must not be used for anything else when this is called.
-///
-void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
- assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
- // Update information to note the fact that this register was just used, and
- // it holds VirtReg.
- PhysRegsUsed[PhysReg] = VirtReg;
- getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
- AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg
-}
-
-
-/// isPhysRegAvailable - Return true if the specified physical register is free
-/// and available for use. This also includes checking to see if aliased
-/// registers are all free...
-///
-bool RALocal::isPhysRegAvailable(unsigned PhysReg) const {
- if (PhysRegsUsed[PhysReg] != -1) return false;
-
- // If the selected register aliases any other allocated registers, it is
- // not free!
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet)
- if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
- return false; // Can't use this reg then.
- return true;
-}
-
-
-/// getFreeReg - Look to see if there is a free register available in the
-/// specified register class. If not, return 0.
-///
-unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) {
- // Get iterators defining the range of registers that are valid to allocate in
- // this class, which also specifies the preferred allocation order.
- TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
- TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
- for (; RI != RE; ++RI)
- if (isPhysRegAvailable(*RI)) { // Is reg unused?
- assert(*RI != 0 && "Cannot use register!");
- return *RI; // Found an unused register!
- }
- return 0;
-}
-
-
-/// getReg - Find a physical register to hold the specified virtual
-/// register. If all compatible physical registers are used, this method spills
-/// the last used virtual register to the stack, and uses that register.
-///
-unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned VirtReg, bool NoFree) {
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
-
- // First check to see if we have a free register of the requested type...
- unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
-
- if (PhysReg != 0) {
- // Assign the register.
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
- }
-
- // If we didn't find an unused register, scavenge one now!
- assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
-
- // Loop over all of the preallocated registers from the least recently used
- // to the most recently used. When we find one that is capable of holding
- // our register, use it.
- for (unsigned i = 0; PhysReg == 0; ++i) {
- assert(i != PhysRegsUseOrder.size() &&
- "Couldn't find a register of the appropriate class!");
-
- unsigned R = PhysRegsUseOrder[i];
-
- // We can only use this register if it holds a virtual register (ie, it
- // can be spilled). Do not use it if it is an explicitly allocated
- // physical register!
- assert(PhysRegsUsed[R] != -1 &&
- "PhysReg in PhysRegsUseOrder, but is not allocated?");
- if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
- // If the current register is compatible, use it.
- if (RC->contains(R)) {
- PhysReg = R;
- break;
- }
-
- // If one of the registers aliased to the current register is
- // compatible, use it.
- for (const unsigned *AliasIt = TRI->getAliasSet(R);
- *AliasIt; ++AliasIt) {
- if (!RC->contains(*AliasIt)) continue;
-
- // If this is pinned down for some reason, don't use it. For
- // example, if CL is pinned, and we run across CH, don't use
- // CH as justification for using scavenging ECX (which will
- // fail).
- if (PhysRegsUsed[*AliasIt] == 0) continue;
-
- // Make sure the register is allocatable. Don't allocate SIL on
- // x86-32.
- if (PhysRegsUsed[*AliasIt] == -2) continue;
-
- PhysReg = *AliasIt; // Take an aliased register
- break;
- }
- }
- }
-
- assert(PhysReg && "Physical register not assigned!?!?");
-
- // At this point PhysRegsUseOrder[i] is the least recently used register of
- // compatible register class. Spill it to memory and reap its remains.
- spillPhysReg(MBB, I, PhysReg);
-
- // Now that we know which register we need to assign this to, do it now!
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
-}
-
-
-/// reloadVirtReg - This method transforms the specified virtual
-/// register use to refer to a physical register. This method may do this in
-/// one of several ways: if the register is available in a physical register
-/// already, it uses that physical register. If the value is not in a physical
-/// register, and if there are physical registers available, it loads it into a
-/// register: PhysReg if that is an available physical register, otherwise any
-/// register. If register pressure is high, and it is possible, it tries to
-/// fold the load of the virtual register into the instruction itself. It
-/// avoids doing this if register pressure is low to improve the chance that
-/// subsequent instructions can use the reloaded value. This method returns
-/// the modified instruction.
-///
-MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum,
- SmallSet<unsigned, 4> &ReloadedRegs,
- unsigned PhysReg) {
- unsigned VirtReg = MI->getOperand(OpNum).getReg();
- unsigned SubIdx = MI->getOperand(OpNum).getSubReg();
-
- // If the virtual register is already available, just update the instruction
- // and return.
- if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
- if (SubIdx) {
- PR = TRI->getSubReg(PR, SubIdx);
- MI->getOperand(OpNum).setSubReg(0);
- }
- MI->getOperand(OpNum).setReg(PR); // Assign the input register
- if (!MI->isDebugValue()) {
- // Do not do these for DBG_VALUE as they can affect codegen.
- MarkPhysRegRecentlyUsed(PR); // Already have this value available!
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
- }
- return MI;
- }
-
- // Otherwise, we need to fold it into the current instruction, or reload it.
- // If we have registers available to hold the value, use them.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- // If we already have a PhysReg (this happens when the instruction is a
- // reg-to-reg copy with a PhysReg destination) use that.
- if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
- !isPhysRegAvailable(PhysReg))
- PhysReg = getFreeReg(RC);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
-
- if (PhysReg) { // Register is available, allocate it!
- assignVirtToPhysReg(VirtReg, PhysReg);
- } else { // No registers available.
- // Force some poor hapless value out of the register file to
- // make room for the new register, and reload it.
- PhysReg = getReg(MBB, MI, VirtReg, true);
- }
-
- markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
-
- DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into "
- << TRI->getName(PhysReg) << "\n");
-
- // Add move instruction(s)
- TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI);
- ++NumLoads; // Update statistics
-
- MF->getRegInfo().setPhysRegUsed(PhysReg);
- // Assign the input register.
- if (SubIdx) {
- MI->getOperand(OpNum).setSubReg(0);
- MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx));
- } else
- MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
-
- if (!ReloadedRegs.insert(PhysReg)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- report_fatal_error(Msg.str());
- }
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (ReloadedRegs.insert(*SubRegs)) continue;
-
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- report_fatal_error(Msg.str());
- }
-
- return MI;
-}
-
-/// isReadModWriteImplicitKill - True if this is an implicit kill for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- MO.isDef() && !MO.isDead())
- return true;
- }
- return false;
-}
-
-/// isReadModWriteImplicitDef - True if this is an implicit def for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- !MO.isDef() && MO.isKill())
- return true;
- }
- return false;
-}
-
-// precedes - Helper function to determine with MachineInstr A
-// precedes MachineInstr B within the same MBB.
-static bool precedes(MachineBasicBlock::iterator A,
- MachineBasicBlock::iterator B) {
- if (A == B)
- return false;
-
- MachineBasicBlock::iterator I = A->getParent()->begin();
- while (I != A->getParent()->end()) {
- if (I == A)
- return true;
- else if (I == B)
- return false;
-
- ++I;
- }
-
- return false;
-}
-
-/// ComputeLocalLiveness - Computes liveness of registers within a basic
-/// block, setting the killed/dead flags as appropriate.
-void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
- // Keep track of the most recently seen previous use or def of each reg,
- // so that we can update them with dead/kill markers.
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
- for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
- I != E; ++I) {
- if (I->isDebugValue())
- continue;
-
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
- // Uses don't trigger any flags, but we need to save
- // them for later. Also, we have to process these
- // _before_ processing the defs, since an instr
- // uses regs before it defs them.
- if (!MO.isReg() || !MO.getReg() || !MO.isUse())
- continue;
-
- // Ignore helpful kill flags from earlier passes.
- MO.setIsKill(false);
-
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
-
- const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
- if (Aliases == 0)
- continue;
-
- while (*Aliases) {
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- alias = LastUseDef.find(*Aliases);
-
- if (alias != LastUseDef.end() && alias->second.first != I)
- LastUseDef[*Aliases] = std::make_pair(I, i);
-
- ++Aliases;
- }
- }
-
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = I->getOperand(i);
- // Defs others than 2-addr redefs _do_ trigger flag changes:
- // - A def followed by a def is dead
- // - A use followed by a def is a kill
- if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
-
- unsigned SubIdx = MO.getSubReg();
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- last = LastUseDef.find(MO.getReg());
- if (last != LastUseDef.end()) {
- // Check if this is a two address instruction. If so, then
- // the def does not kill the use.
- if (last->second.first == I && I->isRegTiedToUseOperand(i))
- continue;
-
- MachineOperand &lastUD =
- last->second.first->getOperand(last->second.second);
- if (SubIdx && lastUD.getSubReg() != SubIdx)
- // Partial re-def, the last def is not dead.
- // %reg1024:5<def> =
- // %reg1024:6<def> =
- // or
- // %reg1024:5<def> = op %reg1024, 5
- continue;
-
- if (lastUD.isDef())
- lastUD.setIsDead(true);
- else
- lastUD.setIsKill(true);
- }
-
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
- }
- }
-
- // Live-out (of the function) registers contain return values of the function,
- // so we need to make sure they are alive at return time.
- MachineBasicBlock::iterator Ret = MBB.getFirstTerminator();
- bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn());
-
- if (BBEndsInReturn)
- for (MachineRegisterInfo::liveout_iterator
- I = MF->getRegInfo().liveout_begin(),
- E = MF->getRegInfo().liveout_end(); I != E; ++I)
- if (!Ret->readsRegister(*I)) {
- Ret->addOperand(MachineOperand::CreateReg(*I, false, true));
- LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1);
- }
-
- // Finally, loop over the final use/def of each reg
- // in the block and determine if it is dead.
- for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
- MachineInstr *MI = I->second.first;
- unsigned idx = I->second.second;
- MachineOperand &MO = MI->getOperand(idx);
-
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
-
- // A crude approximation of "live-out" calculation
- bool usedOutsideBlock = isPhysReg ? false :
- UsedInMultipleBlocks.test(MO.getReg() -
- TargetRegisterInfo::FirstVirtualRegister);
-
- // If the machine BB ends in a return instruction, then the value isn't used
- // outside of the BB.
- if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) {
- // DBG_VALUE complicates this: if the only refs of a register outside
- // this block are DBG_VALUE, we can't keep the reg live just for that,
- // as it will cause the reg to be spilled at the end of this block when
- // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that
- // happens.
- bool UsedByDebugValueOnly = false;
- for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
- UE = MRI->reg_end(); UI != UE; ++UI) {
- // Two cases:
- // - used in another block
- // - used in the same block before it is defined (loop)
- if (UI->getParent() == &MBB &&
- !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
- continue;
-
- if (UI->isDebugValue()) {
- UsedByDebugValueOnly = true;
- continue;
- }
-
- // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
- UsedInMultipleBlocks.set(MO.getReg() -
- TargetRegisterInfo::FirstVirtualRegister);
- usedOutsideBlock = true;
- UsedByDebugValueOnly = false;
- break;
- }
-
- if (UsedByDebugValueOnly)
- for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
- UE = MRI->reg_end(); UI != UE; ++UI)
- if (UI->isDebugValue() &&
- (UI->getParent() != &MBB ||
- (MO.isDef() && precedes(&*UI, MI))))
- UI.getOperand().setReg(0U);
- }
-
- // Physical registers and those that are not live-out of the block are
- // killed/dead at their last use/def within this block.
- if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) {
- if (MO.isUse()) {
- // Don't mark uses that are tied to defs as kills.
- if (!MI->isRegTiedToDefOperand(idx))
- MO.setIsKill(true);
- } else {
- MO.setIsDead(true);
- }
- }
- }
-}
-
-void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
- // loop over each instruction
- MachineBasicBlock::iterator MII = MBB.begin();
-
- DEBUG({
- const BasicBlock *LBB = MBB.getBasicBlock();
- if (LBB)
- dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
- });
-
- // Add live-in registers as active.
- for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
- E = MBB.livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- MF->getRegInfo().setPhysRegUsed(Reg);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
- }
-
- ComputeLocalLiveness(MBB);
-
- // Otherwise, sequentially allocate each instruction in the MBB.
- while (MII != MBB.end()) {
- MachineInstr *MI = MII++;
- const TargetInstrDesc &TID = MI->getDesc();
- DEBUG({
- dbgs() << "\nStarting RegAlloc of: " << *MI;
- dbgs() << " Regs have values: ";
- for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
- if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i]))
- dbgs() << "*";
- dbgs() << "[" << TRI->getName(i)
- << ",%reg" << PhysRegsUsed[i] << "] ";
- }
- dbgs() << '\n';
- });
-
- // Determine whether this is a copy instruction. The cases where the
- // source or destination are phys regs are handled specially.
- unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
- unsigned SrcCopyPhysReg = 0U;
- bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg) &&
- SrcCopySubReg == DstCopySubReg;
- if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
- SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
-
- // Loop over the implicit uses, making sure that they are at the head of the
- // use order list, so they don't get reallocated.
- if (TID.ImplicitUses) {
- for (const unsigned *ImplicitUses = TID.ImplicitUses;
- *ImplicitUses; ++ImplicitUses)
- MarkPhysRegRecentlyUsed(*ImplicitUses);
- }
-
- SmallVector<unsigned, 8> Kills;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isKill()) continue;
-
- if (!MO.isImplicit())
- Kills.push_back(MO.getReg());
- else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
- // These are extra physical register kills when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- Kills.push_back(MO.getReg());
- }
-
- // If any physical regs are earlyclobber, spill any value they might
- // have in them, then mark them unallocatable.
- // If any virtual regs are earlyclobber, allocate them now (before
- // freeing inputs that are killed).
- if (MI->isInlineAsm()) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
- !MO.getReg())
- continue;
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) =
- std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- if (unsigned DestSubIdx = MO.getSubReg()) {
- MO.setSubReg(0);
- DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
- }
- MO.setReg(DestPhysReg); // Assign the earlyclobber register
- } else {
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
- }
- }
-
- // If a DBG_VALUE says something is located in a spilled register,
- // change the DBG_VALUE to be undef, which prevents the register
- // from being reloaded here. Doing that would change the generated
- // code, unless another use immediately follows this instruction.
- if (MI->isDebugValue() &&
- MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
- unsigned VirtReg = MI->getOperand(0).getReg();
- if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- !getVirt2PhysRegMapSlot(VirtReg))
- MI->getOperand(0).setReg(0U);
- }
-
- // Get the used operands into registers. This has the potential to spill
- // incoming values if we are out of registers. Note that we completely
- // ignore physical register uses here. We assume that if an explicit
- // physical register is referenced by the instruction, that it is guaranteed
- // to be live-in, or the input is badly hosed.
- //
- SmallSet<unsigned, 4> ReloadedRegs;
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand &MO = MI->getOperand(i);
- // here we are looking for only used operands (never def&use)
- if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
- isCopy ? DstCopyReg : 0);
- }
-
- // If this instruction is the last user of this register, kill the
- // value, freeing the register being used, so it doesn't need to be
- // spilled to memory.
- //
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- unsigned VirtReg = Kills[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- // If the virtual register was never materialized into a register, it
- // might not be in the map, but it won't hurt to zero it out anyway.
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
- continue;
- } else {
- assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
- "Silently clearing a virtual register?");
- }
-
- if (!PhysReg) continue;
-
- DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
- << "[%reg" << VirtReg <<"], removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- DEBUG(dbgs() << " Last use of "
- << TRI->getName(*SubRegs) << "[%reg" << VirtReg
- <<"], removing it from live set\n");
- removePhysReg(*SubRegs);
- }
- }
- }
-
- // Loop over all of the operands of the instruction, spilling registers that
- // are defined, and marking explicit destinations in the PhysRegsUsed map.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
-
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
-
- // Loop over the implicit defs, spilling them as well.
- if (TID.ImplicitDefs) {
- for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
- *ImplicitDefs; ++ImplicitDefs) {
- unsigned Reg = *ImplicitDefs;
- if (PhysRegsUsed[Reg] != -2) {
- spillPhysReg(MBB, MI, Reg, true);
- AddToPhysRegsUseOrder(Reg);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- }
- MF->getRegInfo().setPhysRegUsed(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
- }
- }
-
- SmallVector<unsigned, 8> DeadDefs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDead())
- DeadDefs.push_back(MO.getReg());
- }
-
- // Okay, we have allocated all of the source operands and spilled any values
- // that would be destroyed by defs of this instruction. Loop over the
- // explicit defs and assign them to a register, spilling incoming values if
- // we need to scavenge a register.
- //
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
- // If this is a copy try to reuse the input as the output;
- // that will make the copy go away.
- // If this is a copy, the source reg is a phys reg, and
- // that reg is available, use that phys reg for DestPhysReg.
- // If this is a copy, the source reg is a virtual reg, and
- // the phys reg that was assigned to that virtual reg is now
- // available, use that phys reg for DestPhysReg. (If it's now
- // available that means this was the last use of the source.)
- if (isCopy &&
- TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
- isPhysRegAvailable(SrcCopyReg)) {
- DestPhysReg = SrcCopyReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else if (isCopy &&
- TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
- SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
- MF->getRegInfo().getRegClass(DestVirtReg)->
- contains(SrcCopyPhysReg)) {
- DestPhysReg = SrcCopyPhysReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- }
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
-
- if (unsigned DestSubIdx = MO.getSubReg()) {
- MO.setSubReg(0);
- DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
- }
- MO.setReg(DestPhysReg); // Assign the output register
- }
-
- // If this instruction defines any registers that are immediately dead,
- // kill them now.
- //
- for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
- unsigned VirtReg = DeadDefs[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- assert(PhysReg != 0);
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
- continue;
- } else if (!PhysReg)
- continue;
-
- DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
- << " [%reg" << VirtReg
- << "] is never used, removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] != -2) {
- DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
- << " [%reg" << *AliasSet
- << "] is never used, removing it from live set\n");
- removePhysReg(*AliasSet);
- }
- }
- }
-
- // If this instruction is a call, make sure there are no dirty registers. The
- // call might throw an exception, and the landing pad expects to find all
- // registers in stack slots.
- if (TID.isCall())
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) {
- if (PhysRegsUsed[i] <= 0) continue;
- unsigned VirtReg = PhysRegsUsed[i];
- if (!isVirtRegModified(VirtReg)) continue;
- DEBUG(dbgs() << " Storing dirty %reg" << VirtReg);
- storeVirtReg(MBB, MI, VirtReg, i, false);
- markVirtRegModified(VirtReg, false);
- DEBUG(dbgs() << " because the call might throw\n");
- }
-
- // Finally, if this is a noop copy instruction, zap it. (Except that if
- // the copy is dead, it must be kept to avoid messing up liveness info for
- // the register scavenger. See pr4100.)
- if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg) &&
- SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg &&
- DeadDefs.empty()) {
- ++NumCopies;
- MBB.erase(MI);
- }
- }
-
- MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
-
- // Spill all physical registers holding virtual registers now.
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
- if (unsigned VirtReg = PhysRegsUsed[i])
- spillVirtReg(MBB, MI, VirtReg, i);
- else
- removePhysReg(i);
- }
-
-#if 0
- // This checking code is very expensive.
- bool AllOk = true;
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (unsigned PR = Virt2PhysRegMap[i]) {
- cerr << "Register still mapped: " << i << " -> " << PR << "\n";
- AllOk = false;
- }
- assert(AllOk && "Virtual registers still in phys regs?");
-#endif
-
- // Clear any physical register which appear live at the end of the basic
- // block, but which do not hold any virtual registers. e.g., the stack
- // pointer.
- PhysRegsUseOrder.clear();
-}
-
-/// runOnMachineFunction - Register allocate the whole function
-///
-bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
- DEBUG(dbgs() << "Machine Function\n");
- MF = &Fn;
- MRI = &Fn.getRegInfo();
- TM = &Fn.getTarget();
- TRI = TM->getRegisterInfo();
- TII = TM->getInstrInfo();
-
- PhysRegsUsed.assign(TRI->getNumRegs(), -1);
-
- // At various places we want to efficiently check to see whether a register
- // is allocatable. To handle this, we mark all unallocatable registers as
- // being pinned down, permanently.
- {
- BitVector Allocable = TRI->getAllocatableSet(Fn);
- for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
- if (!Allocable[i])
- PhysRegsUsed[i] = -2; // Mark the reg unallocable.
- }
-
- // initialize the virtual->physical register map to have a 'null'
- // mapping for all virtual registers
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
- StackSlotForVirtReg.grow(LastVirtReg);
- Virt2PhysRegMap.grow(LastVirtReg);
- Virt2LastUseMap.grow(LastVirtReg);
- VirtRegModified.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
- UsedInMultipleBlocks.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
-
- // Loop over all of the basic blocks, eliminating virtual register references
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB)
- AllocateBasicBlock(*MBB);
-
- StackSlotForVirtReg.clear();
- PhysRegsUsed.clear();
- VirtRegModified.clear();
- UsedInMultipleBlocks.clear();
- Virt2PhysRegMap.clear();
- Virt2LastUseMap.clear();
- return true;
-}
-
-FunctionPass *llvm::createLocalRegisterAllocator() {
- return new RALocal();
-}
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 4fafd28..7e61a12 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -396,28 +396,23 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
if (srcRegIsPhysical && dstRegIsPhysical)
continue;
- // If it's a copy that includes a virtual register but the source and
- // destination classes differ then we can't coalesce, so continue with
- // the next instruction.
- const TargetRegisterClass *srcRegClass = srcRegIsPhysical ?
- tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg);
-
- const TargetRegisterClass *dstRegClass = dstRegIsPhysical ?
- tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg);
-
- if (srcRegClass != dstRegClass)
+ // If it's a copy that includes two virtual register but the source and
+ // destination classes differ then we can't coalesce.
+ if (!srcRegIsPhysical && !dstRegIsPhysical &&
+ mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
continue;
- // We also need any physical regs to be allocable, coalescing with
- // a non-allocable register is invalid.
- if (srcRegIsPhysical) {
+ // If one is physical and one is virtual, check that the physical is
+ // allocatable in the class of the virtual.
+ if (srcRegIsPhysical && !dstRegIsPhysical) {
+ const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
if (std::find(dstRegClass->allocation_order_begin(*mf),
dstRegClass->allocation_order_end(*mf), srcReg) ==
dstRegClass->allocation_order_end(*mf))
continue;
}
-
- if (dstRegIsPhysical) {
+ if (!srcRegIsPhysical && dstRegIsPhysical) {
+ const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
if (std::find(srcRegClass->allocation_order_begin(*mf),
srcRegClass->allocation_order_end(*mf), dstReg) ==
srcRegClass->allocation_order_end(*mf))
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 1131e3d..ab0bc2d 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -16,6 +16,8 @@
#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Pass.h"
@@ -33,6 +35,160 @@ char RegisterCoalescer::ID = 0;
//
RegisterCoalescer::~RegisterCoalescer() {}
+unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+ if (!a) return b;
+ if (!b) return a;
+ return tri_.composeSubRegIndices(a, b);
+}
+
+bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
+ unsigned &Src, unsigned &Dst,
+ unsigned &SrcSub, unsigned &DstSub) const {
+ if (MI->isCopy()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = MI->getOperand(0).getSubReg();
+ Src = MI->getOperand(1).getReg();
+ SrcSub = MI->getOperand(1).getSubReg();
+ } else if (MI->isSubregToReg()) {
+ Dst = MI->getOperand(0).getReg();
+ DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+ Src = MI->getOperand(2).getReg();
+ SrcSub = MI->getOperand(2).getSubReg();
+ } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) {
+ return false;
+ }
+ return true;
+}
+
+bool CoalescerPair::setRegisters(const MachineInstr *MI) {
+ srcReg_ = dstReg_ = subIdx_ = 0;
+ newRC_ = 0;
+ flipped_ = crossClass_ = false;
+
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ return false;
+ partial_ = SrcSub || DstSub;
+
+ // If one register is a physreg, it must be Dst.
+ if (TargetRegisterInfo::isPhysicalRegister(Src)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ flipped_ = true;
+ }
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+
+ if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ // Eliminate DstSub on a physreg.
+ if (DstSub) {
+ Dst = tri_.getSubReg(Dst, DstSub);
+ if (!Dst) return false;
+ DstSub = 0;
+ }
+
+ // Eliminate SrcSub by picking a corresponding Dst superregister.
+ if (SrcSub) {
+ Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
+ if (!Dst) return false;
+ SrcSub = 0;
+ } else if (!MRI.getRegClass(Src)->contains(Dst)) {
+ return false;
+ }
+ } else {
+ // Both registers are virtual.
+
+ // Both registers have subreg indices.
+ if (SrcSub && DstSub) {
+ // For now we only handle the case of identical indices in commensurate
+ // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg
+ // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg.
+ if (SrcSub != DstSub)
+ return false;
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (!getCommonSubClass(DstRC, SrcRC))
+ return false;
+ SrcSub = DstSub = 0;
+ }
+
+ // There can be no SrcSub.
+ if (SrcSub) {
+ std::swap(Src, Dst);
+ DstSub = SrcSub;
+ SrcSub = 0;
+ assert(!flipped_ && "Unexpected flip");
+ flipped_ = true;
+ }
+
+ // Find the new register class.
+ const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
+ const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
+ if (DstSub)
+ newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
+ else
+ newRC_ = getCommonSubClass(DstRC, SrcRC);
+ if (!newRC_)
+ return false;
+ crossClass_ = newRC_ != DstRC || newRC_ != SrcRC;
+ }
+ // Check our invariants
+ assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ "Cannot have a physical SubIdx");
+ srcReg_ = Src;
+ dstReg_ = Dst;
+ subIdx_ = DstSub;
+ return true;
+}
+
+bool CoalescerPair::flip() {
+ if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_))
+ return false;
+ std::swap(srcReg_, dstReg_);
+ flipped_ = !flipped_;
+ return true;
+}
+
+bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
+ if (!MI)
+ return false;
+ unsigned Src, Dst, SrcSub, DstSub;
+ if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+ return false;
+
+ // Find the virtual register that is srcReg_.
+ if (Dst == srcReg_) {
+ std::swap(Src, Dst);
+ std::swap(SrcSub, DstSub);
+ } else if (Src != srcReg_) {
+ return false;
+ }
+
+ // Now check that Dst matches dstReg_.
+ if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) {
+ if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ return false;
+ assert(!subIdx_ && "Inconsistent CoalescerPair state.");
+ // DstSub could be set for a physreg from INSERT_SUBREG.
+ if (DstSub)
+ Dst = tri_.getSubReg(Dst, DstSub);
+ // Full copy of Src.
+ if (!SrcSub)
+ return dstReg_ == Dst;
+ // This is a partial register copy. Check that the parts match.
+ return tri_.getSubReg(dstReg_, SrcSub) == Dst;
+ } else {
+ // dstReg_ is virtual.
+ if (dstReg_ != Dst)
+ return false;
+ // Registers match, do the subregisters line up?
+ return compose(subIdx_, SrcSub) == DstSub;
+ }
+}
+
// Because of the way .a files work, we must force the SimpleRC
// implementation to be pulled in if the RegisterCoalescer classes are
// pulled in. Otherwise we run the risk of RegisterCoalescer being
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 690e59f..43b3fb6 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -141,6 +141,10 @@ void RegScavenger::forward() {
// Find out which registers are early clobbered, killed, defined, and marked
// def-dead in this instruction.
+ // FIXME: The scavenger is not predication aware. If the instruction is
+ // predicated, conservatively assume "kill" markers do not actually kill the
+ // register. Similarly ignores "dead" markers.
+ bool isPred = TII->isPredicated(MI);
BitVector EarlyClobberRegs(NumPhysRegs);
BitVector KillRegs(NumPhysRegs);
BitVector DefRegs(NumPhysRegs);
@@ -155,11 +159,11 @@ void RegScavenger::forward() {
if (MO.isUse()) {
// Two-address operands implicitly kill.
- if (MO.isKill() || MI->isRegTiedToDefOperand(i))
+ if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i)))
addRegWithSubRegs(KillRegs, Reg);
} else {
assert(MO.isDef());
- if (MO.isDead())
+ if (!isPred && MO.isDead())
addRegWithSubRegs(DeadRegs, Reg);
else
addRegWithSubRegs(DefRegs, Reg);
@@ -238,8 +242,18 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
return 0;
}
+/// getRegsAvailable - Return all available registers in the register class
+/// in Mask.
+void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC,
+ BitVector &Mask) {
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+ I != E; ++I)
+ if (!isAliasUsed(*I))
+ Mask.set(*I);
+}
+
/// findSurvivorReg - Return the candidate register that is unused for the
-/// longest after MBBI. UseMI is set to the instruction where the search
+/// longest after StargMII. UseMI is set to the instruction where the search
/// stopped.
///
/// No more than InstrLimit instructions are inspected.
@@ -258,6 +272,10 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
bool inVirtLiveRange = false;
for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) {
+ if (MI->isDebugValue()) {
+ ++InstrLimit; // Don't count debug instructions
+ continue;
+ }
bool isVirtKillInsn = false;
bool isVirtDefInsn = false;
// Remove any candidates touched by instruction.
@@ -321,13 +339,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
Candidates.reset(MO.getReg());
}
+ // Try to find a register that's unused if there is one, as then we won't
+ // have to spill.
+ if ((Candidates & RegsAvailable).any())
+ Candidates &= RegsAvailable;
+
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
- // If we found an unused register there is no reason to spill it. We have
- // probably found a callee-saved register that has been saved in the
- // prologue, but happens to be unused at this point.
+ // If we found an unused register there is no reason to spill it.
if (!isAliasUsed(SReg))
return SReg;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index da20c12..7d39dc4 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -380,26 +380,26 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) {
}
#endif
-/// InitDAGTopologicalSorting - create the initial topological
+/// InitDAGTopologicalSorting - create the initial topological
/// ordering from the DAG to be scheduled.
///
-/// The idea of the algorithm is taken from
+/// The idea of the algorithm is taken from
/// "Online algorithms for managing the topological order of
/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly
-/// This is the MNR algorithm, which was first introduced by
-/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
+/// This is the MNR algorithm, which was first introduced by
+/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in
/// "Maintaining a topological order under edge insertions".
///
-/// Short description of the algorithm:
+/// Short description of the algorithm:
///
/// Topological ordering, ord, of a DAG maps each node to a topological
/// index so that for all edges X->Y it is the case that ord(X) < ord(Y).
///
-/// This means that if there is a path from the node X to the node Z,
+/// This means that if there is a path from the node X to the node Z,
/// then ord(X) < ord(Z).
///
/// This property can be used to check for reachability of nodes:
-/// if Z is reachable from X, then an insertion of the edge Z->X would
+/// if Z is reachable from X, then an insertion of the edge Z->X would
/// create a cycle.
///
/// The algorithm first computes a topological ordering for the DAG by
@@ -431,7 +431,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// Collect leaf nodes.
WorkList.push_back(SU);
}
- }
+ }
int Id = DAGSize;
while (!WorkList.empty()) {
@@ -456,7 +456,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
SUnit *SU = &SUnits[i];
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
+ assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] &&
"Wrong topological sorting");
}
}
@@ -494,7 +494,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
bool& HasLoop) {
std::vector<const SUnit*> WorkList;
- WorkList.reserve(SUnits.size());
+ WorkList.reserve(SUnits.size());
WorkList.push_back(SU);
do {
@@ -504,20 +504,20 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
for (int I = SU->Succs.size()-1; I >= 0; --I) {
int s = SU->Succs[I].getSUnit()->NodeNum;
if (Node2Index[s] == UpperBound) {
- HasLoop = true;
+ HasLoop = true;
return;
}
// Visit successors if not already and in affected region.
if (!Visited.test(s) && Node2Index[s] < UpperBound) {
WorkList.push_back(SU->Succs[I].getSUnit());
- }
- }
+ }
+ }
} while (!WorkList.empty());
}
-/// Shift - Renumber the nodes so that the topological ordering is
+/// Shift - Renumber the nodes so that the topological ordering is
/// preserved.
-void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
+void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
int UpperBound) {
std::vector<int> L;
int shift = 0;
@@ -568,7 +568,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
// Is Ord(TargetSU) < Ord(SU) ?
if (LowerBound < UpperBound) {
Visited.reset();
- // There may be a path from TargetSU to SU. Check for it.
+ // There may be a path from TargetSU to SU. Check for it.
DFS(TargetSU, UpperBound, HasLoop);
}
return HasLoop;
@@ -580,8 +580,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) {
Index2Node[index] = n;
}
-ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort(
- std::vector<SUnit> &sunits)
- : SUnits(sunits) {}
+ScheduleDAGTopologicalSort::
+ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits) : SUnits(sunits) {}
ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index ee08e1d..0a2fb37 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -50,11 +50,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
break;
}
}
- bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
- SU->CopyDstRC, SU->CopySrcRC,
- DebugLoc());
- (void)Success;
- assert(Success && "copyRegToReg failed!");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(VRI->second);
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
@@ -62,11 +59,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
isNew = isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
- bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
- SU->CopyDstRC, SU->CopySrcRC,
- DebugLoc());
- (void)Success;
- assert(Success && "copyRegToReg failed!");
+ BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
+ .addReg(I->getReg());
}
break;
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index ad82db2..d90659b 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -69,8 +69,10 @@ namespace llvm {
const SmallSet<unsigned, 8> &LoopLiveIns) {
unsigned Count = 0;
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I, ++Count) {
+ I != E; ++I) {
const MachineInstr *MI = I;
+ if (MI->isDebugValue())
+ continue;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -79,6 +81,7 @@ namespace llvm {
if (LoopLiveIns.count(MOReg))
Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
}
+ ++Count; // Not every iteration due to dbg_value above.
}
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 0cfd5e1..799988a 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMSelectionDAG
- CallingConvLower.cpp
DAGCombiner.cpp
FastISel.cpp
FunctionLoweringInfo.cpp
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bddd78..e671752 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -211,6 +211,7 @@ namespace {
SDValue visitBUILD_VECTOR(SDNode *N);
SDValue visitCONCAT_VECTORS(SDNode *N);
SDValue visitVECTOR_SHUFFLE(SDNode *N);
+ SDValue visitMEMBARRIER(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
@@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, dl, PVT,
+ return DAG.getExtLoad(ExtType, PVT, dl,
LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
MemVT, LD->isVolatile(),
@@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+ SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
MemVT, LD->isVolatile(),
@@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N);
case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N);
case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N);
+ case ISD::MEMBARRIER: return visitMEMBARRIER(N);
}
return SDValue();
}
@@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() +
(uint64_t)N1C->getSExtValue());
// fold ((c1-A)+c2) -> (c1+c2)-A
@@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
// fold (sub Sym, c) -> Sym-c
if (N1C && GA->getOpcode() == ISD::GlobalAddress)
- return DAG.getGlobalAddress(GA->getGlobal(), VT,
+ return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT,
GA->getOffset() -
(uint64_t)N1C->getSExtValue());
// fold (sub Sym+c1, Sym+c2) -> c1-c2
@@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (zext x), (zext y)) -> (zext (OP x, y))
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
- // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
+ // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
//
// do not sink logical op inside of a vector extend, since it may combine
// into a vsetcc.
@@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
- (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
+ (N0.getOpcode() == ISD::TRUNCATE &&
+ (!TLI.isZExtFree(VT, Op0VT) ||
+ !TLI.isTruncateFree(Op0VT, VT)) &&
+ TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
(!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
@@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(), LN0->getSrcValueOffset(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
@@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
+ DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
@@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
// fold (or x, undef) -> -1
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ if (!LegalOperations &&
+ (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
}
@@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
return SDValue(Rot, 0);
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return NewSRL;
}
+ // Attempt to convert a srl of a load into a narrower zero-extending load.
+ SDValue NarrowLoad = ReduceLoadWidth(N);
+ if (NarrowLoad.getNode())
+ return NarrowLoad;
+
// Here is a common situation. We want to optimize:
//
// %a = ...
@@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
- (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
- (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
- N0.getValueType()) ||
- !TLI.isZExtFree(N0.getValueType(), VT))) {
+ (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
@@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
- VT, LN0->getChain(), LN0->getBasePtr(),
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
+ N->getDebugLoc(),
+ LN0->getChain(), LN0->getBasePtr(),
LN0->getSrcValue(),
LN0->getSrcValueOffset(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
@@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// we know that the element size of the sext'd result matches the
// element size of the compare operands.
if (VT.getSizeInBits() == N0VT.getSizeInBits())
- return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend
else {
- EVT MatchingElementType =
- EVT::getIntegerVT(*DAG.getContext(),
- N0VT.getScalarType().getSizeInBits());
- EVT MatchingVectorType =
- EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
- N0VT.getVectorNumElements());
- SDValue VsetCC =
- DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
- N0.getOperand(1),
- cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
/// extended, also fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
+
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
return SDValue();
+ } else if (Opc == ISD::SRL) {
+ // Annother special-case: SRL is basically zero-extending a narrower
+ // value.
+ ExtType = ISD::ZEXTLOAD;
+ N0 = SDValue(N, 0);
+ ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01) return SDValue();
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - N01->getZExtValue());
}
unsigned EVTBits = ExtVT.getSizeInBits();
@@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
+ : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
NewAlign);
@@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
@@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(), EVT,
@@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
- if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
- return ReduceLoadWidth(N);
+ if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
+ SDValue Reduced = ReduceLoadWidth(N);
+ if (Reduced.getNode())
+ return Reduced;
+ }
+
+ // Simplify the operands using demanded-bits information.
+ if (!VT.isVector() &&
+ SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
@@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
- LD->getValueType(0),
+ return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
+ N->getDebugLoc(),
Chain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
@@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
- LD->getValueType(0),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
+ LD->getDebugLoc(),
BetterChain, Ptr, LD->getSrcValue(),
LD->getSrcValueOffset(),
LD->getMemoryVT(),
@@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Check if the result type doesn't match the inserted element type. A
// SCALAR_TO_VECTOR may truncate the inserted element and the
// EXTRACT_VECTOR_ELT may widen the extracted vector.
- EVT EltVT = InVec.getValueType().getVectorElementType();
SDValue InOp = InVec.getOperand(0);
EVT NVT = N->getValueType(0);
if (InOp.getValueType() != NVT) {
@@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
}
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
- return SDValue();
-
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) {
+ if (!TLI.getShouldFoldAtomicFences())
+ return SDValue();
+
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3)), atomic.getResNo());
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return SDValue(DAG.UpdateNodeOperands(atomic.getNode(),
+ fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2)),
+ atomic.getResNo());
+ default:
+ return SDValue();
+ }
+}
+
/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
/// an AND to a vector_shuffle with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
LLD->getAlignment());
} else {
Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getDebugLoc(),
TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
LLD->getChain(), Addr, 0, 0,
LLD->getMemoryVT(),
LLD->isVolatile(),
@@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
}
}
- // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
+ // Check to see if this is an integer abs.
+ // select_cc setg[te] X, 0, X, -X ->
+ // select_cc setgt X, -1, X, -X ->
+ // select_cc setl[te] X, 0, -X, X ->
+ // select_cc setlt X, 1, -X, X ->
// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
- N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
- N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
+ if (N1C) {
+ ConstantSDNode *SubC = NULL;
+ if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
+ (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
+ N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
+ else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
+ (N1C->isOne() && CC == ISD::SETLT)) &&
+ N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
+ SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
+
EVT XType = N0.getValueType();
- SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
- DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
- SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
- N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
- }
- // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
- // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
- N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
- if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
- EVT XType = N0.getValueType();
- if (SubC->isNullValue() && XType.isInteger()) {
- SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
- N0,
- DAG.getConstant(XType.getSizeInBits()-1,
- getShiftAmountTy()));
- SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
- XType, N0, Shift);
- AddToWorkList(Shift.getNode());
- AddToWorkList(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
- }
+ if (SubC && SubC->isNullValue() && XType.isInteger()) {
+ SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
+ N0,
+ DAG.getConstant(XType.getSizeInBits()-1,
+ getShiftAmountTy()));
+ SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
+ XType, N0, Shift);
+ AddToWorkList(Shift.getNode());
+ AddToWorkList(Add.getNode());
+ return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
}
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 95f4d07..3f7e4a5 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -44,18 +44,38 @@
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
-#include "FunctionLoweringInfo.h"
using namespace llvm;
+/// startNewBlock - Set the current block to which generated machine
+/// instructions will be appended, and clear the local CSE map.
+///
+void FastISel::startNewBlock() {
+ LocalValueMap.clear();
+
+ // Start out as null, meaining no local-value instructions have
+ // been emitted.
+ LastLocalValue = 0;
+
+ // Advance the last local value past any EH_LABEL instructions.
+ MachineBasicBlock::iterator
+ I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end();
+ while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) {
+ LastLocalValue = I;
+ ++I;
+ }
+}
+
bool FastISel::hasTrivialKill(const Value *V) const {
// Don't consider constants or arguments to have trivial kills.
const Instruction *I = dyn_cast<Instruction>(V);
@@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V);
- if (I != ValueMap.end())
- return I->second;
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end()) {
+ unsigned Reg = I->second;
+ return Reg;
+ }
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
// In bottom-up mode, just create the virtual register which will be used
// to hold the value. It will be materialized later.
- if (IsBottomUp) {
- Reg = createResultReg(TLI.getRegClassFor(VT));
- if (isa<Instruction>(V))
- ValueMap[V] = Reg;
- else
- LocalValueMap[V] = Reg;
- return Reg;
- }
+ if (isa<Instruction>(V) &&
+ (!isa<AllocaInst>(V) ||
+ !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
+ return FuncInfo.InitializeRegForValue(V);
+
+ MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+
+ // Materialize the value in a register. Emit any instructions in the
+ // local value area.
+ Reg = materializeRegForValue(V, VT);
- return materializeRegForValue(V, VT);
+ leaveLocalValueArea(SaveInsertPt);
+
+ return Reg;
}
/// materializeRegForValue - Helper for getRegForVale. This function is
@@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
}
}
} else if (const Operator *Op = dyn_cast<Operator>(V)) {
- if (!SelectOperator(Op, Op->getOpcode())) return 0;
- Reg = LocalValueMap[Op];
+ if (!SelectOperator(Op, Op->getOpcode()))
+ if (!isa<Instruction>(Op) ||
+ !TargetSelectInstruction(cast<Instruction>(Op)))
+ return 0;
+ Reg = lookUpRegForValue(Op);
} else if (isa<UndefValue>(V)) {
Reg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
// If target-independent code couldn't handle the value, give target-specific
@@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
- if (Reg != 0)
+ if (Reg != 0) {
LocalValueMap[V] = Reg;
+ LastLocalValue = MRI.getVRegDef(Reg);
+ }
return Reg;
}
@@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- if (ValueMap.count(V))
- return ValueMap[V];
+ DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V);
+ if (I != FuncInfo.ValueMap.end())
+ return I->second;
return LocalValueMap[V];
}
@@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
return Reg;
}
- unsigned &AssignedReg = ValueMap[I];
+ unsigned &AssignedReg = FuncInfo.ValueMap[I];
if (AssignedReg == 0)
+ // Use the new register.
AssignedReg = Reg;
else if (Reg != AssignedReg) {
- const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
- TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
- Reg, RegClass, RegClass, DL);
+ // Arrange for uses of AssignedReg to be replaced by uses of Reg.
+ FuncInfo.RegFixups[AssignedReg] = Reg;
+
+ AssignedReg = Reg;
}
+
return AssignedReg;
}
@@ -237,6 +273,33 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
}
+void FastISel::recomputeInsertPt() {
+ if (getLastLocalValue()) {
+ FuncInfo.InsertPt = getLastLocalValue();
+ ++FuncInfo.InsertPt;
+ } else
+ FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI();
+
+ // Now skip past any EH_LABELs, which must remain at the beginning.
+ while (FuncInfo.InsertPt != FuncInfo.MBB->end() &&
+ FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL)
+ ++FuncInfo.InsertPt;
+}
+
+MachineBasicBlock::iterator FastISel::enterLocalValueArea() {
+ MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ recomputeInsertPt();
+ return OldInsertPt;
+}
+
+void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) {
+ if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
+ LastLocalValue = llvm::prior(FuncInfo.InsertPt);
+
+ // Restore the previous insert position.
+ FuncInfo.InsertPt = OldInsertPt;
+}
+
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
/// which has an opcode which directly corresponds to the given ISD opcode.
///
@@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->getZExtValue() == 0) continue;
+ if (CI->isZero()) continue;
uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
@@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) {
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
if (!DIVariable(DI->getVariable()).Verify() ||
- !MF.getMMI().hasDebugInfo())
+ !FuncInfo.MF->getMMI().hasDebugInfo())
return true;
const Value *Address = DI->getAddress();
@@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) {
// those are handled in SelectionDAGBuilder.
if (AI) {
DenseMap<const AllocaInst*, int>::iterator SI =
- StaticAllocaMap.find(AI);
- if (SI == StaticAllocaMap.end()) break; // VLAs.
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs.
int FI = SI->second;
if (!DI->getDebugLoc().isUnknown())
- MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc());
+ FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(),
+ FI, DI->getDebugLoc());
} else
// Building the map above is target independent. Generating DBG_VALUE
// inline is target dependent; do this now.
@@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) {
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addFPImm(CF).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else if (unsigned Reg = lookUpRegForValue(V)) {
- BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Reg, RegState::Debug).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
// Insert an undef so we can see what we dropped.
- BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
- addMetadata(DI->getVariable());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(0U).addImm(DI->getOffset())
+ .addMetadata(DI->getVariable());
}
return true;
}
@@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) {
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
default: break;
case TargetLowering::Expand: {
- assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!");
+ assert(FuncInfo.MBB->isLandingPad() &&
+ "Call to eh.exception not in landing pad!");
unsigned Reg = TLI.getExceptionAddressRegister();
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Reg, RC, RC, DL);
- assert(InsertedCopy && "Can't copy address registers!");
- InsertedCopy = InsertedCopy;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) {
switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) {
default: break;
case TargetLowering::Expand: {
- if (MBB->isLandingPad())
- AddCatchInfo(*cast<CallInst>(I), &MF.getMMI(), MBB);
+ if (FuncInfo.MBB->isLandingPad())
+ AddCatchInfo(*cast<CallInst>(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB);
else {
#ifndef NDEBUG
- CatchInfoLost.insert(cast<CallInst>(I));
+ FuncInfo.CatchInfoLost.insert(cast<CallInst>(I));
#endif
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) MBB->addLiveIn(Reg);
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
unsigned Reg = TLI.getExceptionSelectorRegister();
EVT SrcVT = TLI.getPointerTy();
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
- RC, RC, DL);
- assert(InsertedCopy && "Can't copy address registers!");
- InsertedCopy = InsertedCopy;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Reg);
bool ResultRegIsKill = hasTrivialKill(I);
@@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) {
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT);
TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT);
- ResultReg = createResultReg(DstClass);
-
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Op0, DstClass, SrcClass, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ // Don't attempt a cross-class copy. It will likely fail.
+ if (SrcClass == DstClass) {
+ ResultReg = createResultReg(DstClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(Op0);
+ }
}
// If the reg-reg copy failed, select a BIT_CONVERT opcode.
@@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) {
/// unless it is the immediate (fall-through) successor, and update
/// the CFG.
void
-FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
- if (MBB->isLayoutSuccessor(MSucc)) {
+FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) {
+ if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
// The unconditional fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
- TII.InsertBranch(*MBB, MSucc, NULL, SmallVector<MachineOperand, 0>());
+ TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL,
+ SmallVector<MachineOperand, 0>(), DL);
}
- MBB->addSuccessor(MSucc);
+ FuncInfo.MBB->addSuccessor(MSucc);
}
/// SelectFNeg - Emit an FNeg operation.
@@ -712,8 +779,39 @@ FastISel::SelectFNeg(const User *I) {
}
bool
+FastISel::SelectLoad(const User *I) {
+ LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I));
+
+ // For a load from an alloca, make a limited effort to find the value
+ // already available in a register, avoiding redundant loads.
+ if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) {
+ BasicBlock::iterator ScanFrom = LI;
+ if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(),
+ LI->getParent(), ScanFrom)) {
+ if (!V->use_empty() &&
+ (!isa<Instruction>(V) ||
+ cast<Instruction>(V)->getParent() == LI->getParent() ||
+ (isa<AllocaInst>(V) &&
+ FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) &&
+ (!isa<Argument>(V) ||
+ LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) {
+ unsigned ResultReg = getRegForValue(V);
+ if (ResultReg != 0) {
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+bool
FastISel::SelectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
+ case Instruction::Load:
+ return SelectLoad(I);
case Instruction::Add:
return SelectBinaryOp(I, ISD::ADD);
case Instruction::FAdd:
@@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
if (BI->isUnconditional()) {
const BasicBlock *LLVMSucc = BI->getSuccessor(0);
- MachineBasicBlock *MSucc = MBBMap[LLVMSucc];
- FastEmitBranch(MSucc);
+ MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc];
+ FastEmitBranch(MSucc, BI->getDebugLoc());
return true;
}
@@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
case Instruction::Alloca:
// FunctionLowering has the static-sized case covered.
- if (StaticAllocaMap.count(cast<AllocaInst>(I)))
+ if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I)))
return true;
// Dynamic-sized alloca is not handled yet.
@@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
}
}
-FastISel::FastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- )
- : MBB(0),
- ValueMap(vm),
- MBBMap(bm),
- StaticAllocaMap(am),
- PHINodesToUpdate(pn),
-#ifndef NDEBUG
- CatchInfoLost(cil),
-#endif
- MF(mf),
- MRI(MF.getRegInfo()),
- MFI(*MF.getFrameInfo()),
- MCP(*MF.getConstantPool()),
- TM(MF.getTarget()),
+FastISel::FastISel(FunctionLoweringInfo &funcInfo)
+ : FuncInfo(funcInfo),
+ MRI(FuncInfo.MF->getRegInfo()),
+ MFI(*FuncInfo.MF->getFrameInfo()),
+ MCP(*FuncInfo.MF->getConstantPool()),
+ TM(FuncInfo.MF->getTarget()),
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
TLI(*TM.getTargetLowering()),
- IsBottomUp(false) {
+ TRI(*TM.getRegisterInfo()) {
}
FastISel::~FastISel() {}
@@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
- BuildMI(MBB, DL, II, ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
return ResultReg;
}
@@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
@@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addFPImm(FPImm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
else {
- BuildMI(MBB, DL, II)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
.addReg(Op0, Op0IsKill * RegState::Kill)
.addReg(Op1, Op1IsKill * RegState::Kill)
.addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
else {
- BuildMI(MBB, DL, II).addImm(Imm);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(II.ImplicitDefs[0]);
}
return ResultReg;
}
@@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
unsigned Op0, bool Op0IsKill,
uint32_t Idx) {
- const TargetRegisterClass* RC = MRI.getRegClass(Op0);
-
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
-
- if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Idx);
- else {
- BuildMI(MBB, DL, II)
- .addReg(Op0, Op0IsKill * RegState::Kill)
- .addImm(Idx);
- bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC, DL);
- if (!InsertedCopy)
- ResultReg = 0;
- }
+ assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ "Cannot yet extract from physregs");
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(Op0, getKillRegState(Op0IsKill), Idx);
return ResultReg;
}
@@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
- unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size();
+ unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size();
// Check successor nodes' PHI nodes that expect a constant to be available
// from this block.
for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
const BasicBlock *SuccBB = TI->getSuccessor(succ);
if (!isa<PHINode>(SuccBB->begin())) continue;
- MachineBasicBlock *SuccMBB = MBBMap[SuccBB];
+ MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
// If this terminator has multiple identical successors (common for
// switches), only handle each succ once.
@@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// by bailing out early, we may leave behind some dead instructions,
// since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
// own moves. Second, this check is necessary becuase FastISel doesn't
- // use CreateRegForValue to create registers, so it always creates
+ // use CreateRegs to create registers, so it always creates
// exactly one register for each non-void instruction.
EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
@@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (VT == MVT::i1)
VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT);
else {
- PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
}
@@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
unsigned Reg = getRegForValue(PHIOp);
if (Reg == 0) {
- PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
+ FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
- PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
DL = DebugLoc();
}
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 65c36c1..928e1ec 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "function-lowering-info"
-#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
@@ -30,7 +30,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
@@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) {
if (isa<PHINode>(I)) return true;
const BasicBlock *BB = I->getParent();
for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end();
- UI != E; ++UI)
- if (cast<Instruction>(*UI)->getParent() != BB || isa<PHINode>(*UI))
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U))
return true;
+ }
return false;
}
@@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) {
const BasicBlock *Entry = A->getParent()->begin();
for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
- UI != E; ++UI)
- if (cast<Instruction>(*UI)->getParent() != Entry || isa<SwitchInst>(*UI))
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
return false; // Use not in entry block.
+ }
return true;
}
@@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli)
: TLI(tli) {
}
-void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
- bool EnableFastISel) {
+void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) {
Fn = &fn;
MF = &mf;
RegInfo = &MF->getRegInfo();
+ // Check whether the function can return without sret-demotion.
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(Fn->getReturnType(),
+ Fn->getAttributes().getRetAttributes(), Outs, TLI);
+ CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(),
+ Outs, Fn->getContext());
+
// Create a vreg for each argument register that is not dead and is used
// outside of the entry block for the function.
for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end();
@@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() {
#endif
LiveOutRegInfo.clear();
ArgDbgValues.clear();
+ RegFixups.clear();
}
-unsigned FunctionLoweringInfo::MakeReg(EVT VT) {
+/// CreateReg - Allocate a single virtual register for the given type.
+unsigned FunctionLoweringInfo::CreateReg(EVT VT) {
return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT));
}
-/// CreateRegForValue - Allocate the appropriate number of virtual registers of
+/// CreateRegs - Allocate the appropriate number of virtual registers of
/// the correctly promoted or expanded types. Assign these registers
/// consecutive vreg numbers and return the first assigned number.
///
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
+unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) {
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, V->getType(), ValueVTs);
+ ComputeValueVTs(TLI, Ty, ValueVTs);
unsigned FirstReg = 0;
for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
EVT ValueVT = ValueVTs[Value];
- EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT);
+ EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT);
- unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT);
+ unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = MakeReg(RegisterVT);
+ unsigned R = CreateReg(RegisterVT);
if (!FirstReg) FirstReg = R;
}
}
@@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
MachineBasicBlock *MBB) {
// Inform the MachineModuleInfo of the personality for this landing pad.
- const ConstantExpr *CE = cast<ConstantExpr>(I.getOperand(2));
+ const ConstantExpr *CE = cast<ConstantExpr>(I.getArgOperand(1));
assert(CE->getOpcode() == Instruction::BitCast &&
isa<Function>(CE->getOperand(0)) &&
"Personality should be a function");
@@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Gather all the type infos for this landing pad and pass them along to
// MachineModuleInfo.
std::vector<const GlobalVariable *> TyInfo;
- unsigned N = I.getNumOperands();
+ unsigned N = I.getNumArgOperands();
- for (unsigned i = N - 1; i > 2; --i) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(i))) {
+ for (unsigned i = N - 1; i > 1; --i) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(i))) {
unsigned FilterLength = CI->getZExtValue();
unsigned FirstCatch = i + FilterLength + !FilterLength;
- assert (FirstCatch <= N && "Invalid filter length");
+ assert(FirstCatch <= N && "Invalid filter length");
if (FirstCatch < N) {
TyInfo.reserve(N - FirstCatch);
for (unsigned j = FirstCatch; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addCatchTypeInfo(MBB, TyInfo);
TyInfo.clear();
}
@@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
// Filter.
TyInfo.reserve(FilterLength - 1);
for (unsigned j = i + 1; j < FirstCatch; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addFilterTypeInfo(MBB, TyInfo);
TyInfo.clear();
}
@@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI,
}
}
- if (N > 3) {
- TyInfo.reserve(N - 3);
- for (unsigned j = 3; j < N; ++j)
- TyInfo.push_back(ExtractTypeInfo(I.getOperand(j)));
+ if (N > 2) {
+ TyInfo.reserve(N - 2);
+ for (unsigned j = 2; j < N; ++j)
+ TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j)));
MMI->addCatchTypeInfo(MBB, TyInfo);
}
}
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
deleted file mode 100644
index 4067a5b..0000000
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h
+++ /dev/null
@@ -1,144 +0,0 @@
-//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements routines for translating functions from LLVM IR into
-// Machine IR.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef FUNCTIONLOWERINGINFO_H
-#define FUNCTIONLOWERINGINFO_H
-
-#include "llvm/InlineAsm.h"
-#include "llvm/Instructions.h"
-#include "llvm/ADT/APInt.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#ifndef NDEBUG
-#include "llvm/ADT/SmallSet.h"
-#endif
-#include "llvm/CodeGen/ValueTypes.h"
-#include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/Support/CallSite.h"
-#include <vector>
-
-namespace llvm {
-
-class AllocaInst;
-class BasicBlock;
-class CallInst;
-class Function;
-class GlobalVariable;
-class Instruction;
-class MachineInstr;
-class MachineBasicBlock;
-class MachineFunction;
-class MachineModuleInfo;
-class MachineRegisterInfo;
-class TargetLowering;
-class Value;
-
-//===--------------------------------------------------------------------===//
-/// FunctionLoweringInfo - This contains information that is global to a
-/// function that is used when lowering a region of the function.
-///
-class FunctionLoweringInfo {
-public:
- const TargetLowering &TLI;
- const Function *Fn;
- MachineFunction *MF;
- MachineRegisterInfo *RegInfo;
-
- /// CanLowerReturn - true iff the function's return value can be lowered to
- /// registers.
- bool CanLowerReturn;
-
- /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
- /// allocated to hold a pointer to the hidden sret parameter.
- unsigned DemoteRegister;
-
- /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
- DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
-
- /// ValueMap - Since we emit code for the function a basic block at a time,
- /// we must remember which virtual registers hold the values for
- /// cross-basic-block values.
- DenseMap<const Value*, unsigned> ValueMap;
-
- /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in
- /// the entry block. This allows the allocas to be efficiently referenced
- /// anywhere in the function.
- DenseMap<const AllocaInst*, int> StaticAllocaMap;
-
- /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for
- /// function arguments that are inserted after scheduling is completed.
- SmallVector<MachineInstr*, 8> ArgDbgValues;
-
-#ifndef NDEBUG
- SmallSet<const Instruction *, 8> CatchInfoLost;
- SmallSet<const Instruction *, 8> CatchInfoFound;
-#endif
-
- struct LiveOutInfo {
- unsigned NumSignBits;
- APInt KnownOne, KnownZero;
- LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {}
- };
-
- /// LiveOutRegInfo - Information about live out vregs, indexed by their
- /// register number offset by 'FirstVirtualRegister'.
- std::vector<LiveOutInfo> LiveOutRegInfo;
-
- /// PHINodesToUpdate - A list of phi instructions whose operand list will
- /// be updated after processing the current basic block.
- /// TODO: This isn't per-function state, it's per-basic-block state. But
- /// there's no other convenient place for it to live right now.
- std::vector<std::pair<MachineInstr*, unsigned> > PHINodesToUpdate;
-
- explicit FunctionLoweringInfo(const TargetLowering &TLI);
-
- /// set - Initialize this FunctionLoweringInfo with the given Function
- /// and its associated MachineFunction.
- ///
- void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel);
-
- /// clear - Clear out all the function-specific state. This returns this
- /// FunctionLoweringInfo to an empty state, ready to be used for a
- /// different function.
- void clear();
-
- unsigned MakeReg(EVT VT);
-
- /// isExportedInst - Return true if the specified value is an instruction
- /// exported from its block.
- bool isExportedInst(const Value *V) {
- return ValueMap.count(V);
- }
-
- unsigned CreateRegForValue(const Value *V);
-
- unsigned InitializeRegForValue(const Value *V) {
- unsigned &R = ValueMap[V];
- assert(R == 0 && "Already initialized this value register!");
- return R = CreateRegForValue(V);
- }
-};
-
-/// AddCatchInfo - Extract the personality and type infos from an eh.selector
-/// call, and add them to the specified machine basic block.
-void AddCatchInfo(const CallInst &I,
- MachineModuleInfo *MMI, MachineBasicBlock *MBB);
-
-/// CopyCatchInfo - Copy catch information from DestBB to SrcBB.
-void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB,
- MachineModuleInfo *MMI, FunctionLoweringInfo &FLI);
-
-} // end namespace llvm
-
-#endif
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 16eb8a7..61c2a90 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
EVT VT = Node->getValueType(ResNo);
const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
- SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT);
+ SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
// Figure out the register class to create for the destreg.
if (VRBase) {
@@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
} else {
// Create the reg, emit the copy.
VRBase = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
- DstRC, SrcRC, Node->getDebugLoc());
-
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase).addReg(SrcReg);
}
SDValue Op(Node, ResNo);
@@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
VReg = MRI->createVirtualRegister(RC);
}
- BuildMI(MBB, Op.getDebugLoc(),
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
}
@@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
"Don't have operand info for this instruction!");
if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC, Op.getNode()->getDebugLoc());
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
}
}
@@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
}
if (Opc == TargetOpcode::EXTRACT_SUBREG) {
+ // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::EXTRACT_SUBREG));
-
// Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
@@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
VRBase = MRI->createVirtualRegister(SRC);
}
- // Add def, source, and subreg index
- MI->addOperand(MachineOperand::CreateReg(VRBase, true));
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
+
+ // Add source, and subreg index
AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
IsClone, IsCloned);
- MI->addOperand(MachineOperand::CreateImm(SubIdx));
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
MBB->insert(InsertPos, MI);
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
@@ -511,18 +508,13 @@ void
InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
+ // Create the new VReg in the destination class and emit a copy.
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx);
-
- // Create the new VReg in the destination class and emit a copy.
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC, Node->getDebugLoc());
- assert(Emitted &&
- "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVReg).addReg(VReg);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
@@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- // FIXME: SDDbgValues aren't updated with legalization, so it's possible
- // to have i128 values in them at this point. As a crude workaround, just
- // drop the debug info if this happens.
+ // FIXME: SDDbgValue constants aren't updated with legalization, so it's
+ // possible to have i128 constants in them at this point. Dwarf writer
+ // does not handle i128 constants at the moment so, as a crude workaround,
+ // just drop the debug info if this happens.
if (!CI->getValue().isSignedIntN(64))
MIB.addReg(0U);
else
@@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Create the new machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // The MachineInstr constructor adds implicit-def operands. Scan through
+ // these to determine which are dead.
+ if (MI->getNumOperands() != 0 &&
+ Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ // First, collect all used registers.
+ SmallVector<unsigned, 8> UsedRegs;
+ for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ if (F->getOpcode() == ISD::CopyFromReg)
+ UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
+ else {
+ // Collect declared implicit uses.
+ const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
+ UsedRegs.append(TID.getImplicitUses(),
+ TID.getImplicitUses() + TID.getNumImplicitUses());
+ // In addition to declared implicit uses, we must also check for
+ // direct RegisterSDNode operands.
+ for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
+ if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
+ unsigned Reg = R->getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ UsedRegs.push_back(Reg);
+ }
+ }
+ // Then mark unused registers as dead.
+ MI->setPhysRegsDeadExcept(UsedRegs, *TRI);
+ }
// Add result register values for things that are defined by this
// instruction.
@@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
cast<MachineSDNode>(Node)->memoperands_end());
+ // Insert the instruction into position in the block. This needs to
+ // happen before any custom inserter hook is called so that the
+ // hook knows where in the block to insert the replacement code.
+ MBB->insert(InsertPos, MI);
+
if (II.usesCustomInsertionHook()) {
// Insert this instruction into the basic block using a target
// specific inserter which may returns a new basic block.
- MBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- InsertPos = MBB->end();
+ bool AtEnd = InsertPos == MBB->end();
+ MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
+ if (NewMBB != MBB) {
+ if (AtEnd)
+ InsertPos = NewMBB->end();
+ MBB = NewMBB;
+ }
return;
}
- MBB->insert(InsertPos, MI);
-
// Additional results must be an physical register def.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
@@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
-
- const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0;
- // Get the register classes of the src/dst.
- if (TargetRegisterInfo::isVirtualRegister(SrcReg))
- SrcTRC = MRI->getRegClass(SrcReg);
- else
- SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType());
- if (TargetRegisterInfo::isVirtualRegister(DestReg))
- DstTRC = MRI->getRegClass(DestReg);
- else
- DstTRC = TRI->getPhysicalRegisterRegClass(DestReg,
- Node->getOperand(1).getValueType());
-
- bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
- DstTRC, SrcTRC, Node->getDebugLoc());
- assert(Emitted && "Unable to issue a copy instruction!\n");
- (void) Emitted;
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DestReg).addReg(SrcReg);
break;
}
case ISD::CopyFromReg: {
@@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
+ // Add the isAlignStack bit.
+ int64_t isAlignStack =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+ getZExtValue();
+ MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
@@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case InlineAsm::Kind_RegDef:
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MI->addOperand(MachineOperand::CreateReg(Reg, true));
+ // FIXME: Add dead flags for physical and virtual registers defined.
+ // For now, mark physical register defs as implicit to help fast
+ // regalloc. This makes inline asm look a lot like calls.
+ MI->addOperand(MachineOperand::CreateReg(Reg, true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg)));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false,
- false, false, true));
+ MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
+ /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg),
+ /*isKill=*/ false,
+ /*isDead=*/ false,
+ /*isUndef=*/false,
+ /*isEarlyClobber=*/ true));
}
break;
case InlineAsm::Kind_RegUse: // Use of register.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62a37a5..7a47da4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -31,6 +31,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
@@ -133,7 +134,7 @@ private:
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
- SDValue N1, SDValue N2,
+ SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const;
bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
@@ -143,6 +144,8 @@ private:
DebugLoc dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_PPCF128);
@@ -172,6 +175,8 @@ private:
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results);
};
@@ -181,8 +186,8 @@ private:
/// performs the same shuffe in terms of order or result bytes, but on a type
/// whose vector element type is narrower than the original shuffle type.
/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3>
-SDValue
-SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
+SDValue
+SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
SDValue N1, SDValue N2,
SmallVectorImpl<int> &Mask) const {
unsigned NumMaskElts = VT.getVectorNumElements();
@@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl,
if (NumEltsGrowth == 1)
return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]);
-
+
SmallVector<int, 8> NewMask;
for (unsigned i = 0; i != NumMaskElts; ++i) {
int Idx = Mask[i];
for (unsigned j = 0; j != NumEltsGrowth; ++j) {
- if (Idx < 0)
+ if (Idx < 0)
NewMask.push_back(-1);
else
NewMask.push_back(Idx * NumEltsGrowth + j);
@@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest,
bool OperandsLeadToDest = false;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
OperandsLeadToDest |= // If an operand leads to Dest, so do we.
- LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo);
+ LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest,
+ NodesLeadingTo);
if (OperandsLeadToDest) {
NodesLeadingTo.insert(N);
@@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
EVT SVT = VT;
while (SVT != MVT::f32) {
SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
- if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) &&
+ if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
// Only do this if the target has a native EXTLOAD instruction from
// smaller type.
TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) &&
@@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, dl,
- OrigVT, DAG.getEntryNode(),
+ return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ DAG.getEntryNode(),
CPIdx, PseudoSourceValue::getConstantPool(),
0, VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
@@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
NULL, 0, MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
@@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// The last copy may be partial. Do an extending load.
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
LD->getSrcValue(), SVOffset + Offset,
MemVT, LD->isVolatile(),
LD->isNonTemporal(),
@@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
Stores.size());
// Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
NULL, 0, LoadedVT, false, false, 0);
// Callers expect a MERGE_VALUES node.
@@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset, NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset, NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
- LD->isNonTemporal(), MinAlign(Alignment, IncrementSize));
+ LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
// aggregate the two parts
@@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
"Unexpected illegal type!");
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
- assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+ assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
"Unexpected illegal type!");
@@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::MERGE_VALUES:
case ISD::EH_RETURN:
case ISD::FRAME_TO_ARGS_OFFSET:
+ case ISD::EH_SJLJ_SETJMP:
+ case ISD::EH_SJLJ_LONGJMP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
}
- Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(),
- Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(),
+ Ops.size()), 0);
switch (Action) {
case TargetLowering::Legal:
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
@@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
NodesLeadingTo);
}
- // Now that we legalized all of the inputs (which may have inserted
- // libcalls) create the new CALLSEQ_START node.
+ // Now that we have legalized all of the inputs (which may have inserted
+ // libcalls), create the new CALLSEQ_START node.
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
- // Merge in the last call, to ensure that this call start after the last
+ // Merge in the last call to ensure that this call starts after the last
// call ended.
if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0],
+ Ops.size()), Result.getResNo());
}
// Remember that the CALLSEQ_START is legalized.
@@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
}
} else {
Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1));
@@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
Ops.back() = Tmp2;
- Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ &Ops[0], Ops.size()),
+ Result.getResNo());
}
}
assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
@@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
EVT VT = Node->getValueType(0);
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
Tmp3 = Result.getValue(0);
Tmp4 = Result.getValue(1);
@@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
DAG, TLI);
Tmp3 = Result.getOperand(0);
Tmp4 = Result.getOperand(1);
@@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
ISD::LoadExtType NewExtType =
ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
- Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
NVT, isVolatile, isNonTemporal, Alignment);
@@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (TLI.isLittleEndian()) {
// EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
// Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
- Node->getValueType(0), Tmp1, Tmp2,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
+ Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
isNonTemporal, Alignment);
@@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
- // Build a factor node to remember that this load is independent of the
- // other one.
+ // Build a factor node to remember that this load is independent of
+ // the other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
@@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Big endian - avoid unaligned loads.
// EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
// Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
isNonTemporal, Alignment);
@@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl,
- Node->getValueType(0), Tmp1, Tmp2,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ Node->getValueType(0), dl, Tmp1, Tmp2,
LD->getSrcValue(), SVOffset + IncrementSize,
ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
- // Build a factor node to remember that this load is independent of the
- // other one.
+ // Build a factor node to remember that this load is independent of
+ // the other one.
Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
Hi.getValue(1));
@@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
isCustom = true;
// FALLTHROUGH
case TargetLowering::Legal:
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
Tmp1 = Result.getValue(0);
Tmp2 = Result.getValue(1);
@@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
DAG, TLI);
Tmp1 = Result.getOperand(0);
Tmp2 = Result.getOperand(1);
@@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
- assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!");
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
// Turn the unsupported load into an EXTLOAD followed by an explicit
// zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
Tmp1, Tmp2, LD->getSrcValue(),
LD->getSrcValueOffset(), SrcVT,
LD->isVolatile(), LD->isNonTemporal(),
@@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
{
Tmp3 = LegalizeOp(ST->getValue());
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
- ST->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
EVT VT = Tmp3.getValueType();
switch (TLI.getOperationAction(ISD::STORE, VT)) {
@@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
DAG, TLI);
@@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
} else {
if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() ||
Tmp2 != ST->getBasePtr())
- Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2,
- ST->getOffset());
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp3, Tmp2,
+ ST->getOffset()),
+ Result.getResNo());
switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
default: assert(0 && "This action is not supported yet!");
@@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// expand it.
if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) {
const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty);
+ unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty);
if (ST->getAlignment() < ABIAlignment)
Result = ExpandUnalignedStore(cast<StoreSDNode>(Result.getNode()),
DAG, TLI);
@@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
false, false, 0);
else
- return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
NULL, 0, Vec.getValueType().getVectorElementType(),
false, false, 0);
}
@@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
Node->getOperand(i), Idx, SV, Offset,
EltVT, false, false, 0));
} else
- Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
+ Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx, SV, Offset,
false, false, 0));
}
@@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT,
+ return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
false, false, DestAlign);
}
@@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ // Legalize the call sequence, starting with the chain. This will advance
+ // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
+ // was added by LowerCallTo (guaranteeing proper serialization of calls).
+ LegalizeOp(CallInfo.second);
+ return CallInfo;
+}
+
SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64,
@@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
- SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52);
+ SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
+ TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
@@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
EVT SHVT = TLI.getShiftAmountTy();
- SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
DAG.getConstant(UINT64_C(0x800), MVT::i64));
- SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
@@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
DAG.getEntryNode(), CPIdx,
PseudoSourceValue::getConstantPool(), 0,
MVT::f32, false, false, Alignment));
@@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
}
+std::pair <SDValue, SDValue> SelectionDAGLegalize::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
DebugLoc dl = Node->getDebugLoc();
@@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EH_RETURN:
case ISD::EH_LABEL:
case ISD::PREFETCH:
- case ISD::MEMBARRIER:
case ISD::VAEND:
+ case ISD::EH_SJLJ_LONGJMP:
+ Results.push_back(Node->getOperand(0));
+ break;
+ case ISD::EH_SJLJ_SETJMP:
+ Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
+ case ISD::MEMBARRIER: {
+ // If the target didn't lower this, lower it to '__sync_synchronize()' call
+ TargetLowering::ArgListTy Args;
+ std::pair<SDValue, SDValue> CallResult =
+ TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false, 0, CallingConv::C, false,
+ /*isReturnValueUsed=*/true,
+ DAG.getExternalSymbol("__sync_synchronize",
+ TLI.getPointerTy()),
+ Args, DAG, dl);
+ Results.push_back(CallResult.second);
+ break;
+ }
+ // By default, atomic intrinsics are marked Legal and lowered. Targets
+ // which don't support them directly, however, may want libcalls, in which
+ // case they mark them Expand, and we get here.
+ // FIXME: Unimplemented for now. Add libcalls.
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_CMP_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(Node);
+ Results.push_back(Tmp.first);
+ Results.push_back(Tmp.second);
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
@@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT VT = Node->getValueType(0);
Tmp1 = Node->getOperand(0);
Tmp2 = Node->getOperand(1);
- SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ unsigned Align = Node->getConstantOperandVal(3);
+
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
+ false, false, 0);
+ SDValue VAList = VAListLoad;
+
+ if (Align > TLI.getMinStackArgumentAlignment()) {
+ assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
+ VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(Align - 1,
+ TLI.getPointerTy()));
+
+ VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
+ DAG.getConstant(-Align,
+ TLI.getPointerTy()));
+ }
+
// Increment the pointer, VAList, to the next vaarg
Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
DAG.getConstant(TLI.getTargetData()->
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0,
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
false, false, 0);
// Load the actual argument out of the pointer VAList
Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
@@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const TargetData &TD = *TLI.getTargetData();
unsigned EntrySize =
DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
-
+
Index = DAG.getNode(ISD::MUL, dl, PTy,
Index, DAG.getConstant(EntrySize, PTy));
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
PseudoSourceValue::getJumpTable(), 0, MemVT,
false, false, 0);
Addr = LD;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index e3eb949..650ee5a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
SDValue NewL;
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
- NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(),
- NVT, L->getChain(), L->getBasePtr(), L->getOffset(),
+ NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
+ NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
}
// Do a non-extending load followed by FP_EXTEND.
- NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD,
- L->getMemoryVT(), L->getChain(),
+ NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
+ L->getMemoryVT(), dl, L->getChain(),
L->getBasePtr(), L->getOffset(),
L->getSrcValue(), L->getSrcValueOffset(),
L->getMemoryVT(), L->isVolatile(),
@@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
SDValue NewVAARG;
- NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+ NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) {
@@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
@@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)),
+ 0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
@@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
@@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) {
@@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
@@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8b382bc..b94ea9a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
N->getSrcValue(), N->getSrcValueOffset(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
SmallVector<SDValue, 8> Parts(NumRegs);
for (unsigned i = 0; i < NumRegs; ++i) {
- Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+ Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+ N->getConstantOperandVal(3));
Chain = Parts[i].getValue(1);
}
@@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) {
// The chain (Op#0), CC (#1) and basic block destination (Op#4) are always
// legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- N->getOperand(1), LHS, RHS, N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), LHS, RHS, N->getOperand(4)),
+ 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
@@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) {
SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT);
// The chain (Op#0) and basic block destination (Op#2) are always legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond,
- N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond,
+ N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) {
@@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
for (unsigned i = 0; i < NumElts; ++i)
NewOps.push_back(GetPromotedInteger(N->getOperand(i)));
- return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts);
+ return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) {
@@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
assert(N->getOperand(1).getValueType().getSizeInBits() >=
N->getValueType(0).getVectorElementType().getSizeInBits() &&
"Type of inserted value narrower than vector element type!");
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
GetPromotedInteger(N->getOperand(1)),
- N->getOperand(2));
+ N->getOperand(2)),
+ 0);
}
assert(OpNo == 2 && "Different operand and result vector types?");
// Promote the index.
SDValue Idx = ZExtPromotedInteger(N->getOperand(2));
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- N->getOperand(1), Idx);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ N->getOperand(1), Idx), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
@@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) {
SDValue Flag = GetPromotedInteger(N->getOperand(i));
NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1);
}
- return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps,
- array_lengthof(NewOps));
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
// Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote
// the operand in place.
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- GetPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ GetPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
@@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType());
SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Cond,
- N->getOperand(1), N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, Cond,
+ N->getOperand(1), N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
@@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get());
// The CC (#4) and the possible return values (#2 and #3) have legal types.
- return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2),
- N->getOperand(3), N->getOperand(4));
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2),
+ N->getOperand(3), N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
@@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) {
PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get());
// The CC (#2) is always legal.
- return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2));
+ return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
- ZExtPromotedInteger(N->getOperand(1)));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
+ ZExtPromotedInteger(N->getOperand(1))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
@@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- SExtPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ SExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
@@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) {
- return DAG.UpdateNodeOperands(SDValue(N, 0),
- ZExtPromotedInteger(N->getOperand(0)));
+ return SDValue(DAG.UpdateNodeOperands(N,
+ ZExtPromotedInteger(N->getOperand(0))), 0);
}
SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
@@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break;
+
+ case ISD::SADDO:
+ case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
+ case ISD::UADDO:
+ case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize, NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
SVOffset+IncrementSize,
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
@@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Compute the overflow.
+ //
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Sum >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ //
+ EVT OType = Node->getValueType(1);
+ SDValue Zero = DAG.getConstant(0, LHS.getValueType());
+
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ Node->getOpcode() == ISD::SADDO ?
+ ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(Node, 1), Cmp);
+}
+
void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ?
+ ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
+ LHS, RHS);
+ SplitInteger(Sum, Lo, Hi);
+
+ // Calculate the overflow: addition overflows iff a + b < a, and subtraction
+ // overflows iff a - b > a.
+ SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS,
+ N->getOpcode () == ISD::UADDO ?
+ ISD::SETULT : ISD::SETUGT);
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0),
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
DAG.getCondCode(CCCode), NewLHS, NewRHS,
- N->getOperand(4));
+ N->getOperand(4)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
@@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) {
}
// Update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
N->getOperand(2), N->getOperand(3),
- DAG.getCondCode(CCCode));
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
@@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) {
}
// Otherwise, update N to have the operands specified.
- return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS,
- DAG.getCondCode(CCCode));
+ return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS,
+ DAG.getCondCode(CCCode)), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
@@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
// upper half of the shift amount is zero. Just use the lower half.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(1), Lo, Hi);
- return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
@@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
// constant to valid type.
SDValue Lo, Hi;
GetExpandedInteger(N->getOperand(0), Lo, Hi);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+ return SDValue(DAG.UpdateNodeOperands(N, Lo), 0);
}
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
@@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
FudgePtr, NULL, 0, MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 17f131b..6e56c98 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
NewOps.push_back(Op);
} else if (Op != OrigOp) {
// This is the first operand to change - add all operands so far.
- NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
+ NewOps.append(N->op_begin(), N->op_begin() + i);
NewOps.push_back(Op);
}
}
// Some operands changed - update the node.
if (!NewOps.empty()) {
- SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0],
- NewOps.size()).getNode();
+ SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size());
if (M != N) {
// The node morphed into a different node. Normally for this to happen
// the original node would have to be marked NewNode. However this can
@@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
// can potentially cause recursive merging.
SmallSetVector<SDNode*, 16> NodesToAnalyze;
NodeUpdateListener NUL(*this, NodesToAnalyze);
- DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
-
- // The old node may still be present in a map like ExpandedIntegers or
- // PromotedIntegers. Inform maps about the replacement.
- ReplacedValues[From] = To;
-
- // Process the list of nodes that need to be reanalyzed.
- while (!NodesToAnalyze.empty()) {
- SDNode *N = NodesToAnalyze.back();
- NodesToAnalyze.pop_back();
- if (N->getNodeId() != DAGTypeLegalizer::NewNode)
- // The node was analyzed while reanalyzing an earlier node - it is safe to
- // skip. Note that this is not a morphing node - otherwise it would still
- // be marked NewNode.
- continue;
+ do {
+ DAG.ReplaceAllUsesOfValueWith(From, To, &NUL);
+
+ // The old node may still be present in a map like ExpandedIntegers or
+ // PromotedIntegers. Inform maps about the replacement.
+ ReplacedValues[From] = To;
+
+ // Process the list of nodes that need to be reanalyzed.
+ while (!NodesToAnalyze.empty()) {
+ SDNode *N = NodesToAnalyze.back();
+ NodesToAnalyze.pop_back();
+ if (N->getNodeId() != DAGTypeLegalizer::NewNode)
+ // The node was analyzed while reanalyzing an earlier node - it is safe
+ // to skip. Note that this is not a morphing node - otherwise it would
+ // still be marked NewNode.
+ continue;
- // Analyze the node's operands and recalculate the node ID.
- SDNode *M = AnalyzeNewNode(N);
- if (M != N) {
- // The node morphed into a different node. Make everyone use the new node
- // instead.
- assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
- assert(N->getNumValues() == M->getNumValues() &&
- "Node morphing changed the number of results!");
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
- SDValue OldVal(N, i);
- SDValue NewVal(M, i);
- if (M->getNodeId() == Processed)
- RemapValue(NewVal);
- DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // Analyze the node's operands and recalculate the node ID.
+ SDNode *M = AnalyzeNewNode(N);
+ if (M != N) {
+ // The node morphed into a different node. Make everyone use the new
+ // node instead.
+ assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!");
+ assert(N->getNumValues() == M->getNumValues() &&
+ "Node morphing changed the number of results!");
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ SDValue OldVal(N, i);
+ SDValue NewVal(M, i);
+ if (M->getNodeId() == Processed)
+ RemapValue(NewVal);
+ DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ }
+ // The original node continues to exist in the DAG, marked NewNode.
}
- // The original node continues to exist in the DAG, marked NewNode.
}
- }
+ // When recursively update nodes with new nodes, it is possible to have
+ // new uses of From due to CSE. If this happens, replace the new uses of
+ // From with To.
+ } while (!From.use_empty());
}
void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c665963..bd86694 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -345,6 +345,9 @@ private:
void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -620,6 +623,7 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
SDValue WidenVecRes_Unary(SDNode *N);
SDValue WidenVecRes_InregOp(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 88e1e62..9c2b1d9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
}
void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ EVT OVT = N->getValueType(0);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT);
SDValue Chain = N->getOperand(0);
SDValue Ptr = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
+ const unsigned Align = N->getConstantOperandVal(3);
- Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
- Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
// Handle endianness of the load.
if (TLI.isBigEndian())
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 0e2bd02..621c087 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
Ops.push_back(LegalizeOp(Node->getOperand(i)));
SDValue Result =
- DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size());
+ SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0);
bool HasVectorValue = false;
for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7efeea1..93aeff5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
assert(N->isUnindexed() && "Indexed vector load?");
- SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(),
+ SDValue Result = DAG.getLoad(ISD::UNINDEXED,
N->getExtensionType(),
N->getValueType(0).getVectorElementType(),
+ N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
N->getSrcValue(), N->getSrcValueOffset(),
@@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
SplitVecRes_UnaryOp(N, Lo, Hi);
break;
@@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
EVT LoMemVT, HiMemVT;
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
- Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset,
+ Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
SVOffset += IncrementSize;
- Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset,
+ Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
@@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
uint64_t LoElts = Lo.getValueType().getVectorNumElements();
if (IdxVal < LoElts)
- return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx);
- return DAG.UpdateNodeOperands(SDValue(N, 0), Hi,
+ return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
+ return SDValue(DAG.UpdateNodeOperands(N, Hi,
DAG.getConstant(IdxVal - LoElts,
- Idx.getValueType()));
+ Idx.getValueType())),
+ 0);
}
// Store the vector to the stack.
@@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
SV, 0, EltVT, false, false, 0);
}
@@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FDIV:
case ISD::FMUL:
case ISD::FPOW:
- case ISD::FPOWI:
case ISD::FREM:
case ISD::FSUB:
case ISD::MUL:
@@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Binary(N);
break;
+ case ISD::FPOWI:
+ Res = WidenVecRes_POWI(N);
+ break;
+
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
@@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEG:
case ISD::FSIN:
case ISD::FSQRT:
+ case ISD::FEXP:
+ case ISD::FEXP2:
+ case ISD::FLOG:
+ case ISD::FLOG2:
+ case ISD::FLOG10:
Res = WidenVecRes_Unary(N);
break;
}
@@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeLegal(VT) && NumElts != 1) {
+ while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
} else {
// Since the operation can trap, apply operation on the original vector.
+ EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
SmallVector<SDValue, 16> ConcatOps(CurNumElts);
unsigned ConcatEnd = 0; // Current ConcatOps index.
- unsigned Idx = 0; // Current Idx into input vectors.
+ int Idx = 0; // Current Idx into input vectors.
+
+ // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // while (orig. vector has unhandled elements) {
+ // take munches of size NumElts from the beginning and add to ConcatOps
+ // NumElts := next smaller supported vector size or 1
+ // }
while (CurNumElts != 0) {
while (CurNumElts >= NumElts) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1,
@@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
Idx += NumElts;
CurNumElts -= NumElts;
}
- EVT PrevVecVT = VT;
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeLegal(VT) && NumElts != 1);
+ } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
if (NumElts == 1) {
- // Since we are using concat vector, build a vector from the scalar ops.
- SDValue VecOp = DAG.getUNDEF(PrevVecVT);
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
- VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp,
- DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2),
- DAG.getIntPtrConstant(i));
+ ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
+ EOp1, EOp2);
}
CurNumElts = 0;
- ConcatOps[ConcatEnd++] = VecOp;
}
}
@@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return ConcatOps[0];
}
- // Rebuild vector to one with the widen type
- Idx = ConcatEnd - 1;
- while (Idx != 0) {
+ // while (Some element of ConcatOps is not of type MaxVT) {
+ // From the end of ConcatOps, collect elements of the same type and put
+ // them into an op of the next larger supported type
+ // }
+ while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
+ Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
- while (Idx != 0 && ConcatOps[Idx].getValueType() == VT)
- --Idx;
- if (Idx != 0) {
- VT = ConcatOps[Idx].getValueType();
- ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
- &ConcatOps[Idx+1], ConcatEnd - Idx - 1);
+ while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
+ Idx--;
+
+ int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
+ EVT NextVT;
+ do {
+ NextSize *= 2;
+ NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
+ } while (!TLI.isTypeSynthesizable(NextVT));
+
+ if (!VT.isVector()) {
+ // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
+ SDValue VecOp = DAG.getUNDEF(NextVT);
+ unsigned NumToInsert = ConcatEnd - Idx - 1;
+ for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) {
+ VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp,
+ ConcatOps[OpIdx], DAG.getIntPtrConstant(i));
+ }
+ ConcatOps[Idx+1] = VecOp;
ConcatEnd = Idx + 2;
+ }
+ else {
+ // Vector type, create a CONCAT_VECTORS of type NextVT
+ SDValue undefVec = DAG.getUNDEF(VT);
+ unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
+ SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
+ unsigned RealVals = ConcatEnd - Idx - 1;
+ unsigned SubConcatEnd = 0;
+ unsigned SubConcatIdx = Idx + 1;
+ while (SubConcatEnd < RealVals)
+ SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
+ while (SubConcatEnd < OpsToConcat)
+ SubConcatOps[SubConcatEnd++] = undefVec;
+ ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
+ NextVT, &SubConcatOps[0],
+ OpsToConcat);
+ ConcatEnd = SubConcatIdx + 1;
}
}
+
+ // Check to see if we have a single operation with the widen type.
+ if (ConcatEnd == 1) {
+ VT = ConcatOps[0].getValueType();
+ if (VT == WidenVT)
+ return ConcatOps[0];
+ }
- unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements();
+ // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
+ unsigned NumOps =
+ WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
- SDValue UndefVal = DAG.getUNDEF(VT);
+ SDValue UndefVal = DAG.getUNDEF(MaxVT);
for (unsigned j = ConcatEnd; j < NumOps; ++j)
ConcatOps[j] = UndefVal;
}
@@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, dl, WidenVT, InOp);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
}
+SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp = GetWidenedVector(N->getOperand(0));
+ SDValue ShOp = N->getOperand(1);
+ return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue InOp = GetWidenedVector(N->getOperand(0));
@@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeLegal(NewInVT)) {
+ if (TLI.isTypeSynthesizable(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SatOp, CvtCode);
}
- if (TLI.isTypeLegal(InWidenVT)) {
+ if (TLI.isTypeSynthesizable(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
if (InWidenSize % Size == 0 && !VT.isVector()) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeLegal(NewVT)) {
+ if (TLI.isTypeSynthesizable(NewVT)) {
SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
@@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV,
+ Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
SVOffset + Offset, LdEltVT, isVolatile,
isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index ad8630a..3b86c32 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
SUnit *LRDef = LiveRegDefs[Reg];
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
+ TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 820ba66..3ef521c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) {
+ if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
@@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
SUnit *LRDef = LiveRegDefs[Reg];
EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, VT);
+ TRI->getMinimalPhysRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
@@ -1116,7 +1116,7 @@ namespace {
SUnit *pop() {
if (empty()) return NULL;
std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
E = Queue.end(); I != E; ++I)
if (Picker(*Best, *I))
Best = I;
@@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return left->getHeight() > right->getHeight();
} else if (RStall)
return false;
+
+ // If either node is scheduling for latency, sort them by height and latency
+ // first.
+ if (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency) {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency;
+ }
+
return BURRSort(left, right, SPQ);
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3185c88..06cf053 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
SUnits.back().OrigNode = &SUnits.back();
SUnit *SU = &SUnits.back();
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
- SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ if (N->isMachineOpcode() &&
+ N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)
+ SU->SchedulingPref = Sched::None;
+ else
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
return SU;
}
@@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg;
const TargetRegisterClass *RC =
- TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+ TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo));
Cost = RC->getCopyCost();
}
}
@@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i)
- VTs.push_back(N->getValueType(i));
+ SDNode *FlagDestNode = Flag.getNode();
+
+ // Don't add a flag from a node to itself.
+ if (FlagDestNode == N) return;
+
+ // Don't add a flag to something which already has a flag.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+
+ for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
+ VTs.push_back(N->getValueType(I));
+
if (AddFlag)
VTs.push_back(MVT::Flag);
+
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- Ops.push_back(N->getOperand(i));
- if (Flag.getNode())
+ for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
+ Ops.push_back(N->getOperand(I));
+
+ if (FlagDestNode)
Ops.push_back(Flag);
+
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
+ MachineSDNode::mmo_iterator Begin = 0, End = 0;
+ MachineSDNode *MN = dyn_cast<MachineSDNode>(N);
+
+ // Store memory references.
+ if (MN) {
+ Begin = MN->memoperands_begin();
+ End = MN->memoperands_end();
+ }
+
DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size());
+
+ // Reset the memory references
+ if (MN)
+ MN->setMemRefs(Begin, End);
}
/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
@@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
/// offsets are not far apart (target specific), it add MVT::Flag inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
-void ScheduleDAGSDNodes::ClusterNeighboringLoads() {
+void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
+ SDNode *Chain = 0;
+ unsigned NumOps = Node->getNumOperands();
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
+ Chain = Node->getOperand(NumOps-1).getNode();
+ if (!Chain)
+ return;
+
+ // Look for other loads of the same chain. Find loads that are loading from
+ // the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
SmallVector<int64_t, 4> Offsets;
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
- SDNode *Node = &*NI;
- if (!Node || !Node->isMachineOpcode())
+ bool Cluster = false;
+ SDNode *Base = Node;
+ for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
+ I != E; ++I) {
+ SDNode *User = *I;
+ if (User == Node || !Visited.insert(User))
continue;
-
- unsigned Opc = Node->getMachineOpcode();
- const TargetInstrDesc &TID = TII->get(Opc);
- if (!TID.mayLoad())
+ int64_t Offset1, Offset2;
+ if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
+ Offset1 == Offset2)
+ // FIXME: Should be ok if they addresses are identical. But earlier
+ // optimizations really should have eliminated one of the loads.
continue;
+ if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
+ Offsets.push_back(Offset1);
+ O2SMap.insert(std::make_pair(Offset2, User));
+ Offsets.push_back(Offset2);
+ if (Offset2 < Offset1)
+ Base = User;
+ Cluster = true;
+ }
- SDNode *Chain = 0;
- unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Other)
- Chain = Node->getOperand(NumOps-1).getNode();
- if (!Chain)
- continue;
+ if (!Cluster)
+ return;
- // Look for other loads of the same chain. Find loads that are loading from
- // the same base pointer and different offsets.
- Visited.clear();
- Offsets.clear();
- O2SMap.clear();
- bool Cluster = false;
- SDNode *Base = Node;
- int64_t BaseOffset;
- for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end();
- I != E; ++I) {
- SDNode *User = *I;
- if (User == Node || !Visited.insert(User))
- continue;
- int64_t Offset1, Offset2;
- if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
- // FIXME: Should be ok if they addresses are identical. But earlier
- // optimizations really should have eliminated one of the loads.
- continue;
- if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
- Offsets.push_back(Offset1);
- O2SMap.insert(std::make_pair(Offset2, User));
- Offsets.push_back(Offset2);
- if (Offset2 < Offset1) {
- Base = User;
- BaseOffset = Offset2;
- } else {
- BaseOffset = Offset1;
- }
- Cluster = true;
- }
+ // Sort them in increasing order.
+ std::sort(Offsets.begin(), Offsets.end());
+
+ // Check if the loads are close enough.
+ SmallVector<SDNode*, 4> Loads;
+ unsigned NumLoads = 0;
+ int64_t BaseOff = Offsets[0];
+ SDNode *BaseLoad = O2SMap[BaseOff];
+ Loads.push_back(BaseLoad);
+ for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
+ int64_t Offset = Offsets[i];
+ SDNode *Load = O2SMap[Offset];
+ if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads))
+ break; // Stop right here. Ignore loads that are further away.
+ Loads.push_back(Load);
+ ++NumLoads;
+ }
- if (!Cluster)
- continue;
+ if (NumLoads == 0)
+ return;
- // Sort them in increasing order.
- std::sort(Offsets.begin(), Offsets.end());
-
- // Check if the loads are close enough.
- SmallVector<SDNode*, 4> Loads;
- unsigned NumLoads = 0;
- int64_t BaseOff = Offsets[0];
- SDNode *BaseLoad = O2SMap[BaseOff];
- Loads.push_back(BaseLoad);
- for (unsigned i = 1, e = Offsets.size(); i != e; ++i) {
- int64_t Offset = Offsets[i];
- SDNode *Load = O2SMap[Offset];
- if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,
- NumLoads))
- break; // Stop right here. Ignore loads that are further away.
- Loads.push_back(Load);
- ++NumLoads;
- }
+ // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // ensure they are scheduled in order of increasing addresses.
+ SDNode *Lead = Loads[0];
+ AddFlags(Lead, SDValue(0, 0), true, DAG);
+
+ SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
+ bool OutFlag = I < E - 1;
+ SDNode *Load = Loads[I];
+
+ AddFlags(Load, InFlag, OutFlag, DAG);
+
+ if (OutFlag)
+ InFlag = SDValue(Load, Load->getNumValues() - 1);
+
+ ++LoadsClustered;
+ }
+}
- if (NumLoads == 0)
+/// ClusterNodes - Cluster certain nodes which should be scheduled together.
+///
+void ScheduleDAGSDNodes::ClusterNodes() {
+ for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
+ E = DAG->allnodes_end(); NI != E; ++NI) {
+ SDNode *Node = &*NI;
+ if (!Node || !Node->isMachineOpcode())
continue;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
- // ensure they are scheduled in order of increasing addresses.
- SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0,0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1);
- for (unsigned i = 1, e = Loads.size(); i != e; ++i) {
- bool OutFlag = i < e-1;
- SDNode *Load = Loads[i];
- AddFlags(Load, InFlag, OutFlag, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues()-1);
- ++LoadsClustered;
- }
+ unsigned Opc = Node->getMachineOpcode();
+ const TargetInstrDesc &TID = TII->get(Opc);
+ if (TID.mayLoad())
+ // Cluster loads from "near" addresses into combined SUnits.
+ ClusterNeighboringLoads(Node);
}
}
@@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (Cost >= 0)
PhysReg = 0;
- const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
- OpSU->Latency, PhysReg);
+ // If this is a ctrl dep, latency is 1.
+ unsigned OpLatency = isChain ? 1 : OpSU->Latency;
+ const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
+ OpLatency, PhysReg);
if (!isChain && !UnitLatencies) {
ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
@@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
- // Cluster loads from "near" addresses into combined SUnits.
- ClusterNeighboringLoads();
+ // Cluster certain nodes which should be scheduled together.
+ ClusterNodes();
// Populate the SUnits array.
BuildSchedUnits();
// Compute all the scheduling dependencies between nodes.
@@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+ if (Def->isMachineOpcode()) {
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (DefIdx >= II.getNumDefs())
return;
int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
if (DefCycle < 0)
return;
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ int UseCycle = 1;
+ if (Use->isMachineOpcode()) {
+ const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ }
if (UseCycle >= 0) {
int Latency = DefCycle - UseCycle + 1;
if (Latency >= 0)
@@ -473,7 +507,7 @@ namespace {
}
// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule use to determine how to insert dbg_value
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
// instructions in the right order.
static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
InstrEmitter &Emitter,
@@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
return;
MachineBasicBlock *BB = Emitter.getBlock();
- if (BB->empty() || BB->back().isPHI()) {
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
// Did not insert any instruction.
Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
return;
}
- Orders.push_back(std::make_pair(Order, &BB->back()));
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
if (!N->getHasDebugValue())
return;
// Opportunistically insert immediate dbg_value uses, i.e. those with source
@@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
for (; PDI != PDE; ++PDI) {
MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap);
if (DbgMI)
- BB->insert(BB->end(), DbgMI);
+ BB->insert(InsertPos, DbgMI);
}
}
@@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert all the dbg_values which have not already been inserted in source
// order sequence.
if (HasDbg) {
- MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
- while (BBBegin != BB->end() && BBBegin->isPHI())
- ++BBBegin;
+ MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
// values.
@@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
// Now emit the rest according to source order.
unsigned LastOrder = 0;
- MachineInstr *LastMI = 0;
for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
- MachineBasicBlock *MIBB = MI->getParent();
#ifndef NDEBUG
unsigned LastDIOrder = 0;
#endif
@@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert to start of the BB (after PHIs).
BB->insert(BBBegin, DbgMI);
else {
+ // Insert at the instruction, which may be in a different
+ // block, if the block was split by a custom inserter.
MachineBasicBlock::iterator Pos = MI;
- MIBB->insert(llvm::next(Pos), DbgMI);
+ MI->getParent()->insert(llvm::next(Pos), DbgMI);
}
}
}
LastOrder = Order;
- LastMI = MI;
}
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index e8714ba..842fc8c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -108,7 +108,10 @@ namespace llvm {
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
- void ClusterNeighboringLoads();
+ void ClusterNeighboringLoads(SDNode *Node);
+ /// ClusterNodes - Cluster certain nodes which should be scheduled together.
+ ///
+ void ClusterNodes();
/// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph.
void BuildSchedUnits();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 38bf68b..e83a034 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
}
// EntryNode could meaningfully have debug info if we can find it...
-SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
+SelectionDAG::SelectionDAG(const TargetMachine &tm)
: TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
- FLI(fli),
EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
@@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) {
SelectionDAG::~SelectionDAG() {
allnodes_clear();
delete Ordering;
- DbgInfo->clear();
delete DbgInfo;
}
@@ -835,11 +833,8 @@ void SelectionDAG::clear() {
EntryNode.UseList = 0;
AllNodes.push_back(&EntryNode);
Root = getEntryNode();
- delete Ordering;
- Ordering = new SDNodeOrdering();
+ Ordering->clear();
DbgInfo->clear();
- delete DbgInfo;
- DbgInfo = new SDDbgInfo();
}
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
}
}
-SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
+SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL,
EVT VT, int64_t Offset,
bool isTargetGA,
unsigned char TargetFlags) {
@@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT,
+ SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT,
Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N,
unsigned i) {
EVT VT = N->getValueType(0);
- DebugLoc dl = N->getDebugLoc();
if (N->getMaskElt(i) < 0)
return getUNDEF(VT.getVectorElementType());
unsigned Index = N->getMaskElt(i);
@@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
VT.getVectorNumElements() ==
Operand.getValueType().getVectorNumElements()) &&
"Vector element count mismatch!");
- if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
+
+ if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
+ OpOpcode == ISD::ANY_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
+
+ // (ext (trunx x)) -> x
+ if (OpOpcode == ISD::TRUNCATE) {
+ SDValue OpOp = Operand.getNode()->getOperand(0);
+ if (OpOp.getValueType() == VT)
+ return OpOp;
+ }
break;
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
@@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N1.getOpcode() == ISD::BUILD_VECTOR &&
N2.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
- Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
break;
@@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
SDValue N1, SDValue N2, SDValue N3) {
// Perform various simplifications.
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
- ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
switch (Opcode) {
case ISD::CONCAT_VECTORS:
// A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to
@@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
N2.getOpcode() == ISD::BUILD_VECTOR &&
N3.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<SDValue, 16> Elts(N1.getNode()->op_begin(), N1.getNode()->op_end());
- Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end());
- Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end());
+ Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end());
+ Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end());
return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size());
}
break;
@@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
if (N2 == N3) return N2; // select C, X, X -> X
break;
- case ISD::BRCOND:
- if (N2C) {
- if (N2C->getZExtValue()) // Unconditional branch
- return getNode(ISD::BR, DL, MVT::Other, N1, N3);
- else
- return N1; // Never-taken branch
- }
- break;
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
@@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT.bitsGT(LVT))
VT = LVT;
}
+
+ // If we're optimizing for size, and there is a limit, bump the maximum number
+ // of operations inserted down to 4. This is a wild guess that approximates
+ // the size of a call to memcpy or memset (3 arguments + call).
+ if (Limit != ~0U) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ Limit = 4;
+ }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- uint64_t Limit = -1ULL;
- if (!AlwaysInline)
- Limit = TLI.getMaxStoresPerMemcpy();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
@@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// below a certain threshold.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
- uint64_t Limit = -1ULL;
- if (!AlwaysInline)
- Limit = TLI.getMaxStoresPerMemmove();
bool DstAlignCanChange = false;
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
@@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
- ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
const Value *SV, int SVOffset, EVT MemVT,
bool isVolatile, bool isNonTemporal,
@@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
MachineMemOperand *MMO =
MF.getMachineMemOperand(SV, Flags, SVOffset,
MemVT.getStoreSize(), Alignment);
- return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO);
+ return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
- ISD::LoadExtType ExtType, EVT VT, SDValue Chain,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+ EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
if (VT == MemVT) {
@@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
bool isVolatile, bool isNonTemporal,
unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
- return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef,
+ return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
const Value *SV,
int SVOffset, EVT MemVT,
bool isVolatile, bool isNonTemporal,
unsigned Alignment) {
SDValue Undef = getUNDEF(Ptr.getValueType());
- return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef,
+ return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
}
@@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
- return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(),
+ return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
LD->getChain(), Base, Offset, LD->getSrcValue(),
LD->getSrcValueOffset(), LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
@@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- SDValue SV) {
- SDValue Ops[] = { Chain, Ptr, SV };
- return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3);
+ SDValue SV,
+ unsigned Align) {
+ SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) };
+ return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4);
}
SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
@@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) {
/// already exists. If the resultant node does not exist in the DAG, the
/// input node is returned. As a degenerate case, if you specify the same
/// input operands as the node already has, the input node is returned.
-SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) {
assert(N->getNumOperands() == 1 && "Update with wrong number of operands");
// Check to see if there is no change.
- if (Op == N->getOperand(0)) return InN;
+ if (Op == N->getOperand(0)) return N;
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) {
assert(N->getNumOperands() == 2 && "Update with wrong number of operands");
// Check to see if there is no change.
if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1))
- return InN; // No operands changed, just return the input node.
+ return N; // No operands changed, just return the input node.
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) {
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) {
SDValue Ops[] = { Op1, Op2, Op3 };
return UpdateNodeOperands(N, Ops, 3);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4) {
SDValue Ops[] = { Op1, Op2, Op3, Op4 };
return UpdateNodeOperands(N, Ops, 4);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2,
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2,
SDValue Op3, SDValue Op4, SDValue Op5) {
SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 };
return UpdateNodeOperands(N, Ops, 5);
}
-SDValue SelectionDAG::
-UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
- SDNode *N = InN.getNode();
+SDNode *SelectionDAG::
+UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) {
assert(N->getNumOperands() == NumOps &&
"Update with wrong number of operands");
@@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
}
// No operands changed, just return the input node.
- if (!AnyChange) return InN;
+ if (!AnyChange) return N;
// See if the modified node already exists.
void *InsertPos = 0;
if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos))
- return SDValue(Existing, InN.getResNo());
+ return Existing;
// Nope it doesn't. Remove the node from its current place in the maps.
if (InsertPos)
@@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) {
// If this gets put into a CSE map, add it.
if (InsertPos) CSEMap.InsertNode(N, InsertPos);
- return InN;
+ return N;
}
/// DropOperands - Release the operands and set this node to have
@@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() {
DropOperands();
}
-GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL,
+ const GlobalValue *GA,
EVT VT, int64_t o, unsigned char TF)
- : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) {
+ : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
@@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FSQRT: return "fsqrt";
case ISD::FSIN: return "fsin";
case ISD::FCOS: return "fcos";
- case ISD::FPOWI: return "fpowi";
- case ISD::FPOW: return "fpow";
case ISD::FTRUNC: return "ftrunc";
case ISD::FFLOOR: return "ffloor";
case ISD::FCEIL: return "fceil";
case ISD::FRINT: return "frint";
case ISD::FNEARBYINT: return "fnearbyint";
+ case ISD::FEXP: return "fexp";
+ case ISD::FEXP2: return "fexp2";
+ case ISD::FLOG: return "flog";
+ case ISD::FLOG2: return "flog2";
+ case ISD::FLOG10: return "flog10";
// Binary operators
case ISD::ADD: return "add";
@@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FREM: return "frem";
case ISD::FCOPYSIGN: return "fcopysign";
case ISD::FGETSIGN: return "fgetsign";
+ case ISD::FPOW: return "fpow";
+ case ISD::FPOWI: return "fpowi";
case ISD::SETCC: return "setcc";
case ISD::VSETCC: return "vsetcc";
case ISD::SELECT: return "select";
@@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
- if (MFI.isFixedObjectIndex(FrameIdx)) {
- int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
- // Finally, the frame object itself may have a known alignment. Factor
- // the alignment + offset into a new alignment. For example, if we know
- // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
- // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
- // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
- return std::max(Align, FIInfoAlign);
- }
return FIInfoAlign;
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fbe601f..d323c16 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "isel"
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
-#include "FunctionLoweringInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -32,6 +31,7 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
-namespace {
- /// RegsForValue - This struct represents the registers (physical or virtual)
- /// that a particular set of values is assigned, and the type information
- /// about the value. The most common situation is to represent one value at a
- /// time, but struct or array values are handled element-wise as multiple
- /// values. The splitting of aggregates is performed recursively, so that we
- /// never have aggregate-typed registers. The values at this point do not
- /// necessarily have legal types, so each value may require one or more
- /// registers of some legal type.
- ///
- struct RegsForValue {
- /// TLI - The TargetLowering object.
- ///
- const TargetLowering *TLI;
-
- /// ValueVTs - The value types of the values, which may not be legal, and
- /// may need be promoted or synthesized from one or more registers.
- ///
- SmallVector<EVT, 4> ValueVTs;
-
- /// RegVTs - The value types of the registers. This is the same size as
- /// ValueVTs and it records, for each value, what the type of the assigned
- /// register or registers are. (Individual values are never synthesized
- /// from more than one type of register.)
- ///
- /// With virtual registers, the contents of RegVTs is redundant with TLI's
- /// getRegisterType member function, however when with physical registers
- /// it is necessary to have a separate record of the types.
- ///
- SmallVector<EVT, 4> RegVTs;
-
- /// Regs - This list holds the registers assigned to the values.
- /// Each legal or promoted value requires one register, and each
- /// expanded value requires multiple registers.
- ///
- SmallVector<unsigned, 4> Regs;
-
- RegsForValue() : TLI(0) {}
-
- RegsForValue(const TargetLowering &tli,
- const SmallVector<unsigned, 4> &regs,
- EVT regvt, EVT valuevt)
- : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
- RegsForValue(const TargetLowering &tli,
- const SmallVector<unsigned, 4> &regs,
- const SmallVector<EVT, 4> &regvts,
- const SmallVector<EVT, 4> &valuevts)
- : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
- RegsForValue(LLVMContext &Context, const TargetLowering &tli,
- unsigned Reg, const Type *Ty) : TLI(&tli) {
- ComputeValueVTs(tli, Ty, ValueVTs);
-
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
- EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
- for (unsigned i = 0; i != NumRegs; ++i)
- Regs.push_back(Reg + i);
- RegVTs.push_back(RegisterVT);
- Reg += NumRegs;
- }
- }
-
- /// areValueTypesLegal - Return true if types of all the values are legal.
- bool areValueTypesLegal() {
- for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT RegisterVT = RegVTs[Value];
- if (!TLI->isTypeLegal(RegisterVT))
- return false;
- }
- return true;
- }
-
-
- /// append - Add the specified values to this one.
- void append(const RegsForValue &RHS) {
- TLI = RHS.TLI;
- ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
- RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
- Regs.append(RHS.Regs.begin(), RHS.Regs.end());
- }
-
-
- /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
- /// this value and returns the result as a ValueVTs value. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
-
- /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
- /// specified value into the registers specified by this object. This uses
- /// Chain/Flag as the input and updates them for the output Chain/Flag.
- /// If the Flag pointer is NULL, no flag is used.
- void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const;
-
- /// AddInlineAsmOperands - Add this value to the specified inlineasm node
- /// operand list. This adds the code marker, matching input operand index
- /// (if applicable), and includes the number of values added into it.
- void AddInlineAsmOperands(unsigned Kind,
- bool HasMatching, unsigned MatchingIdx,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const;
- };
-}
-
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
}
}
+namespace {
+ /// RegsForValue - This struct represents the registers (physical or virtual)
+ /// that a particular set of values is assigned, and the type information
+ /// about the value. The most common situation is to represent one value at a
+ /// time, but struct or array values are handled element-wise as multiple
+ /// values. The splitting of aggregates is performed recursively, so that we
+ /// never have aggregate-typed registers. The values at this point do not
+ /// necessarily have legal types, so each value may require one or more
+ /// registers of some legal type.
+ ///
+ struct RegsForValue {
+ /// ValueVTs - The value types of the values, which may not be legal, and
+ /// may need be promoted or synthesized from one or more registers.
+ ///
+ SmallVector<EVT, 4> ValueVTs;
+
+ /// RegVTs - The value types of the registers. This is the same size as
+ /// ValueVTs and it records, for each value, what the type of the assigned
+ /// register or registers are. (Individual values are never synthesized
+ /// from more than one type of register.)
+ ///
+ /// With virtual registers, the contents of RegVTs is redundant with TLI's
+ /// getRegisterType member function, however when with physical registers
+ /// it is necessary to have a separate record of the types.
+ ///
+ SmallVector<EVT, 4> RegVTs;
+
+ /// Regs - This list holds the registers assigned to the values.
+ /// Each legal or promoted value requires one register, and each
+ /// expanded value requires multiple registers.
+ ///
+ SmallVector<unsigned, 4> Regs;
+
+ RegsForValue() {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ EVT regvt, EVT valuevt)
+ : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
+
+ RegsForValue(const SmallVector<unsigned, 4> &regs,
+ const SmallVector<EVT, 4> &regvts,
+ const SmallVector<EVT, 4> &valuevts)
+ : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
+
+ RegsForValue(LLVMContext &Context, const TargetLowering &tli,
+ unsigned Reg, const Type *Ty) {
+ ComputeValueVTs(tli, Ty, ValueVTs);
+
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
+ EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
+ for (unsigned i = 0; i != NumRegs; ++i)
+ Regs.push_back(Reg + i);
+ RegVTs.push_back(RegisterVT);
+ Reg += NumRegs;
+ }
+ }
+
+ /// areValueTypesLegal - Return true if types of all the values are legal.
+ bool areValueTypesLegal(const TargetLowering &TLI) {
+ for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT RegisterVT = RegVTs[Value];
+ if (!TLI.isTypeLegal(RegisterVT))
+ return false;
+ }
+ return true;
+ }
+
+ /// append - Add the specified values to this one.
+ void append(const RegsForValue &RHS) {
+ ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
+ RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
+ Regs.append(RHS.Regs.begin(), RHS.Regs.end());
+ }
+
+ /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+ /// this value and returns the result as a ValueVTs value. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+ /// specified value into the registers specified by this object. This uses
+ /// Chain/Flag as the input and updates them for the output Chain/Flag.
+ /// If the Flag pointer is NULL, no flag is used.
+ void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const;
+
+ /// AddInlineAsmOperands - Add this value to the specified inlineasm node
+ /// operand list. This adds the code marker, matching input operand index
+ /// (if applicable), and includes the number of values added into it.
+ void AddInlineAsmOperands(unsigned Kind,
+ bool HasMatching, unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const;
+ };
+}
+
+/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
+/// this value and returns the result as a ValueVT value. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
+ FunctionLoweringInfo &FuncInfo,
+ DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Assemble the legal parts into the final values.
+ SmallVector<SDValue, 4> Values(ValueVTs.size());
+ SmallVector<SDValue, 8> Parts;
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ // Copy the legal parts from the registers.
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ Parts.resize(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue P;
+ if (Flag == 0) {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
+ } else {
+ P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
+ *Flag = P.getValue(2);
+ }
+
+ Chain = P.getValue(1);
+
+ // If the source register was virtual and if we know something about it,
+ // add an assert node.
+ if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
+ RegisterVT.isInteger() && !RegisterVT.isVector()) {
+ unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
+ if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
+ const FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo.LiveOutRegInfo[SlotNo];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+
+ if (FromVT != MVT::Other)
+ P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
+ }
+ }
+
+ Parts[i] = P;
+ }
+
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
+ NumRegs, RegisterVT, ValueVT);
+ Part += NumRegs;
+ Parts.clear();
+ }
+
+ return DAG.getNode(ISD::MERGE_VALUES, dl,
+ DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
+ &Values[0], ValueVTs.size());
+}
+
+/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
+/// specified value into the registers specified by this object. This uses
+/// Chain/Flag as the input and updates them for the output Chain/Flag.
+/// If the Flag pointer is NULL, no flag is used.
+void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
+ SDValue &Chain, SDValue *Flag) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Get the list of the values's legal parts.
+ unsigned NumRegs = Regs.size();
+ SmallVector<SDValue, 8> Parts(NumRegs);
+ for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ EVT ValueVT = ValueVTs[Value];
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
+ EVT RegisterVT = RegVTs[Value];
+
+ getCopyToParts(DAG, dl,
+ Val.getValue(Val.getResNo() + Value),
+ &Parts[Part], NumParts, RegisterVT);
+ Part += NumParts;
+ }
+
+ // Copy the parts into the registers.
+ SmallVector<SDValue, 8> Chains(NumRegs);
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ SDValue Part;
+ if (Flag == 0) {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
+ } else {
+ Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
+ *Flag = Part.getValue(1);
+ }
+
+ Chains[i] = Part.getValue(0);
+ }
+
+ if (NumRegs == 1 || Flag)
+ // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
+ // flagged to it. That is the CopyToReg nodes and the user are considered
+ // a single scheduling unit. If we create a TokenFactor and return it as
+ // chain, then the TokenFactor is both a predecessor (operand) of the
+ // user as well as a successor (the TF operands are flagged to the user).
+ // c1, f1 = CopyToReg
+ // c2, f2 = CopyToReg
+ // c3 = TokenFactor c1, c2
+ // ...
+ // = op c3, ..., f2
+ Chain = Chains[NumRegs-1];
+ else
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
+}
+
+/// AddInlineAsmOperands - Add this value to the specified inlineasm node
+/// operand list. This adds the code marker and includes the number of
+/// values added into it.
+void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
+ unsigned MatchingIdx,
+ SelectionDAG &DAG,
+ std::vector<SDValue> &Ops) const {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
+ if (HasMatching)
+ Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
+ SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
+ Ops.push_back(Res);
+
+ for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
+ unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
+ EVT RegisterVT = RegVTs[Value];
+ for (unsigned i = 0; i != NumRegs; ++i) {
+ assert(Reg < Regs.size() && "Mismatch in # registers expected");
+ Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
+ }
+ }
+}
void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
AA = &aa;
@@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
/// consumed.
void SelectionDAGBuilder::clear() {
NodeMap.clear();
+ UnusedArgNodeMap.clear();
PendingLoads.clear();
PendingExports.clear();
CurDebugLoc = DebugLoc();
@@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
}
}
+// getValue - Return an SDValue for the given Value.
SDValue SelectionDAGBuilder::getValue(const Value *V) {
+ // If we already have an SDValue for this value, use it. It's important
+ // to do this first, so that we don't create a CopyFromReg if we already
+ // have a regular SDValue.
+ SDValue &N = NodeMap[V];
+ if (N.getNode()) return N;
+
+ // If there's a virtual register allocated and initialized for this
+ // value, use it.
+ DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
+ if (It != FuncInfo.ValueMap.end()) {
+ unsigned InReg = It->second;
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ }
+
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ return Val;
+}
+
+/// getNonRegisterValue - Return an SDValue for the given Value, but
+/// don't look in FuncInfo.ValueMap for a virtual register.
+SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
+ // If we already have an SDValue for this value, use it.
SDValue &N = NodeMap[V];
if (N.getNode()) return N;
+ // Otherwise create a new SDValue and remember it.
+ SDValue Val = getValueImpl(V);
+ NodeMap[V] = Val;
+ return Val;
+}
+
+/// getValueImpl - Helper function for getValue and getMaterializedValue.
+/// Create an SDValue for the given value.
+SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
if (const Constant *C = dyn_cast<Constant>(V)) {
EVT VT = TLI.getValueType(V->getType(), true);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
- return N = DAG.getConstant(*CI, VT);
+ return DAG.getConstant(*CI, VT);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return N = DAG.getGlobalAddress(GV, VT);
+ return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
if (isa<ConstantPointerNull>(C))
- return N = DAG.getConstant(0, TLI.getPointerTy());
+ return DAG.getConstant(0, TLI.getPointerTy());
if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return N = DAG.getConstantFP(*CFP, VT);
+ return DAG.getConstantFP(*CFP, VT);
if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
- return N = DAG.getUNDEF(VT);
+ return DAG.getUNDEF(VT);
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
visit(CE->getOpcode(), *CE);
@@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
}
- unsigned InReg = FuncInfo.ValueMap[V];
- assert(InReg && "Value not in map!");
-
- RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
- SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
-}
-
-/// Get the EVTs and ArgFlags collections that represent the legalized return
-/// type of the given function. This does not require a DAG or a return value,
-/// and is suitable for use before any DAGs for the function are constructed.
-static void getReturnInfo(const Type* ReturnType,
- Attributes attr, SmallVectorImpl<EVT> &OutVTs,
- SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
- const TargetLowering &TLI,
- SmallVectorImpl<uint64_t> *Offsets = 0) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, ReturnType, ValueVTs);
- unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) return;
- unsigned Offset = 0;
-
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
- if (attr & Attribute::SExt)
- ExtendKind = ISD::SIGN_EXTEND;
- else if (attr & Attribute::ZExt)
- ExtendKind = ISD::ZERO_EXTEND;
-
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
- }
-
- unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
- EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
- unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
- PartVT.getTypeForEVT(ReturnType->getContext()));
-
- // 'inreg' on function refers to return value
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (attr & Attribute::InReg)
- Flags.setInReg();
-
- // Propagate extension type if any
- if (attr & Attribute::SExt)
- Flags.setSExt();
- else if (attr & Attribute::ZExt)
- Flags.setZExt();
-
- for (unsigned i = 0; i < NumParts; ++i) {
- OutVTs.push_back(PartVT);
- OutFlags.push_back(Flags);
- if (Offsets)
- {
- Offsets->push_back(Offset);
- Offset += PartSize;
- }
- }
+ // If this is an instruction which fast-isel has deferred, select it now.
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
+ RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
+ SDValue Chain = DAG.getEntryNode();
+ return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
}
+
+ llvm_unreachable("Can't get register for value!");
+ return SDValue();
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+ SmallVector<SDValue, 8> OutVals;
- if (!FLI.CanLowerReturn) {
- unsigned DemoteReg = FLI.DemoteRegister;
+ if (!FuncInfo.CanLowerReturn) {
+ unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
// Emit a store of the return value through the virtual register.
@@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
else if (F->paramHasAttr(0, Attribute::ZExt))
Flags.setZExt();
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
+ /*isfixed=*/true));
+ OutVals.push_back(Parts[i]);
+ }
}
}
}
@@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction()->getCallingConv();
Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
- Outs, getCurDebugLoc(), DAG);
+ Outs, OutVals, getCurDebugLoc(), DAG);
// Verify that the target's LowerReturn behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
}
void SelectionDAGBuilder::visitBr(const BranchInst &I) {
- MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *BrMBB = FuncInfo.MBB;
// Update machine-CFG edges.
MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // If the branch was constant folded, fix up the CFG.
- if (BrCond.getOpcode() == ISD::BR) {
- SwitchBB->removeSuccessor(CB.FalseBB);
- } else {
- // Otherwise, go ahead and insert the false branch.
- if (BrCond == getControlRoot())
- SwitchBB->removeSuccessor(CB.TrueBB);
-
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
- }
+ // Insert the false branch.
+ if (CB.FalseBB != NextBlock)
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
// therefore require extension or truncating.
SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
- unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
+ unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
@@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
TLI.getPointerTy());
- B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
+ B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
B.Reg, ShiftOp);
@@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- // Make desired shift
SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
TLI.getPointerTy());
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
-
- // Emit bit tests and jumps
- SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
- SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
- ISD::SETNE);
+ SDValue Cmp;
+ if (CountPopulation_64(B.Mask) == 1) {
+ // Testing for a single bit; just compare the shift count with what it
+ // would need to be to shift a 1 bit in that position.
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(ShiftOp.getValueType()),
+ ShiftOp,
+ DAG.getConstant(CountTrailingZeros_64(B.Mask),
+ TLI.getPointerTy()),
+ ISD::SETEQ);
+ } else {
+ // Make desired shift
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
+ TLI.getPointerTy(),
+ DAG.getConstant(1, TLI.getPointerTy()),
+ ShiftOp);
+
+ // Emit bit tests and jumps
+ SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
+ TLI.getPointerTy(), SwitchVal,
+ DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ Cmp = DAG.getSetCC(getCurDebugLoc(),
+ TLI.getSetCCResultType(AndOp.getValueType()),
+ AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ ISD::SETNE);
+ }
SwitchBB->addSuccessor(B.TargetBB);
SwitchBB->addSuccessor(NextMBB);
SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
MVT::Other, getControlRoot(),
- AndCmp, DAG.getBasicBlock(B.TargetBB));
+ Cmp, DAG.getBasicBlock(B.TargetBB));
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
}
void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
- MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
// Retrieve successors.
MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
@@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
}
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
- MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()];
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
// Figure out which block is immediately after the current one.
MachineBasicBlock *NextBlock = 0;
@@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
- MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
// Update machine-CFG edges with unique successors.
SmallVector<BasicBlock*, 32> succs;
@@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT SrcVT = N.getValueType();
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
}
@@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT SrcVT = N.getValueType();
EVT DestVT = TLI.getValueType(I.getType());
setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
}
@@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->getZExtValue() == 0) continue;
+ if (CI->isZero()) continue;
uint64_t Offs =
TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
SDValue OffsVal;
@@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
- AllocSize,
- DAG.getConstant(TySize, AllocSize.getValueType()));
-
EVT IntPtr = TLI.getPointerTy();
- AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+ if (AllocSize.getValueType() != IntPtr)
+ AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
+
+ AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
+ AllocSize,
+ DAG.getConstant(TySize, IntPtr));
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
@@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
- for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
- SDValue Op = getValue(I.getOperand(i));
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ SDValue Op = getValue(I.getArgOperand(i));
assert(TLI.isTypeLegal(Op.getValueType()) &&
"Intrinsic uses a non-legal type?");
Ops.push_back(Op);
@@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
SDValue Root = getRoot();
SDValue L =
DAG.getAtomic(Op, getCurDebugLoc(),
- getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
Root,
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- I.getOperand(1));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ I.getArgOperand(0));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
const char *
SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
@@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FEXP, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
@@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Get the exponent.
@@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG2, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
@@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FLOG10, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue result;
DebugLoc dl = getCurDebugLoc();
- if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
+ if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(1));
+ SDValue Op = getValue(I.getArgOperand(0));
SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
@@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FEXP2, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)));
}
setValue(&I, result);
@@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
void
SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue result;
- const Value *Val = I.getOperand(1);
+ const Value *Val = I.getArgOperand(0);
DebugLoc dl = getCurDebugLoc();
bool IsExp10 = false;
if (getValue(Val).getValueType() == MVT::f32 &&
- getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
+ getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
@@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
}
if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
- SDValue Op = getValue(I.getOperand(2));
+ SDValue Op = getValue(I.getArgOperand(1));
// Put the exponent in the right bit position for later addition to the
// final result:
@@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
} else {
// No special expansion.
result = DAG.getNode(ISD::FPOW, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)));
}
setValue(&I, result);
@@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
if (DV.isInlinedFnArgument(MF.getFunction()))
return false;
- MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()];
+ MachineBasicBlock *MBB = FuncInfo.MBB;
if (MBB != &MF.front())
return false;
@@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::vacopy: visitVACopy(I); return 0;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::setjmp:
return "_setjmp"+!TLI.usesUnderscoreSetJmp();
@@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::memcpy: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
- cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getOperand(1), 0, I.getOperand(2), 0));
+ I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
case Intrinsic::memset: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getOperand(1), 0));
+ I.getArgOperand(0), 0));
return 0;
}
case Intrinsic::memmove: {
// Assert for address < 256 since we support only user defined address
// spaces.
- assert(cast<PointerType>(I.getOperand(1)->getType())->getAddressSpace()
+ assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
< 256 &&
- cast<PointerType>(I.getOperand(2)->getType())->getAddressSpace()
+ cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
< 256 &&
"Unknown address space");
- SDValue Op1 = getValue(I.getOperand(1));
- SDValue Op2 = getValue(I.getOperand(2));
- SDValue Op3 = getValue(I.getOperand(3));
- unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
- bool isVol = cast<ConstantInt>(I.getOperand(5))->getZExtValue();
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+ bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
// If the source and destination are known to not be aliases, we can
// lower memmove as memcpy.
uint64_t Size = -1ULL;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
Size = C->getZExtValue();
- if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
+ if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
AliasAnalysis::NoAlias) {
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getOperand(1), 0, I.getOperand(2), 0));
+ false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getOperand(1), 0, I.getOperand(2), 0));
+ I.getArgOperand(0), 0, I.getArgOperand(1), 0));
return 0;
}
case Intrinsic::dbg_declare: {
@@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
} else {
bool createUndef = false;
// FIXME : Why not use getValue() directly ?
- SDValue &N = NodeMap[V];
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V))
+ // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
if (N.getNode()) {
if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
SDV = DAG.getDbgValue(Variable, N.getNode(),
@@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_exception: {
// Insert the EXCEPTIONADDR instruction.
- assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() &&
+ assert(FuncInfo.MBB->isLandingPad() &&
"Call to eh.exception not in landing pad!");
SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
SDValue Ops[1];
@@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_selector: {
- MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()];
+ MachineBasicBlock *CallMBB = FuncInfo.MBB;
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (CallMBB->isLandingPad())
AddCatchInfo(I, &MMI, CallMBB);
@@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
#endif
// FIXME: Mark exception selector register as live in. Hack for PR1508.
unsigned Reg = TLI.getExceptionSelectorRegister();
- if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg);
+ if (Reg) FuncInfo.MBB->addLiveIn(Reg);
}
// Insert the EHSELECTION instruction.
SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
SDValue Ops[2];
- Ops[0] = getValue(I.getOperand(1));
+ Ops[0] = getValue(I.getArgOperand(0));
Ops[1] = getRoot();
SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
DAG.setRoot(Op.getValue(1));
@@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_typeid_for: {
// Find the type id for the given typeinfo.
- GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
+ GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, MVT::i32);
setValue(&I, Res);
@@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
MVT::Other,
getControlRoot(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2))));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1))));
return 0;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
return 0;
case Intrinsic::eh_dwarf_cfa: {
- EVT VT = getValue(I.getOperand(1)).getValueType();
- SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
+ SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
TLI.getPointerTy());
SDValue Offset = DAG.getNode(ISD::ADD, dl,
TLI.getPointerTy(),
@@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
@@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_setjmp: {
setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
getRoot(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0))));
return 0;
}
@@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::convertuu: Code = ISD::CVT_UU; break;
}
EVT DestVT = TLI.getValueType(I.getType());
- const Value *Op1 = I.getOperand(1);
+ const Value *Op1 = I.getArgOperand(0);
Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
DAG.getValueType(DestVT),
DAG.getValueType(getValue(Op1).getValueType()),
- getValue(I.getOperand(2)),
- getValue(I.getOperand(3)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
Code);
setValue(&I, Res);
return 0;
}
case Intrinsic::sqrt:
setValue(&I, DAG.getNode(ISD::FSQRT, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::powi:
- setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)),
- getValue(I.getOperand(2)), DAG));
+ setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), DAG));
return 0;
case Intrinsic::sin:
setValue(&I, DAG.getNode(ISD::FSIN, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::cos:
setValue(&I, DAG.getNode(ISD::FCOS, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::log:
visitLog(I);
@@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
- MVT::i16, getValue(I.getOperand(1))));
+ MVT::i16, getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
- MVT::f32, getValue(I.getOperand(1))));
+ MVT::f32, getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::pcmarker: {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
return 0;
}
@@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, dl,
- getValue(I.getOperand(1)).getValueType(),
- getValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)).getValueType(),
+ getValue(I.getArgOperand(0))));
return 0;
case Intrinsic::cttz: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
return 0;
}
case Intrinsic::ctlz: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
return 0;
}
case Intrinsic::ctpop: {
- SDValue Arg = getValue(I.getOperand(1));
+ SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
return 0;
@@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
}
case Intrinsic::stackrestore: {
- Res = getValue(I.getOperand(1));
+ Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
return 0;
}
@@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachineFrameInfo *MFI = MF.getFrameInfo();
EVT PtrTy = TLI.getPointerTy();
- SDValue Src = getValue(I.getOperand(1)); // The guard's value.
- AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+ SDValue Src = getValue(I.getArgOperand(0)); // The guard's value.
+ AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
int FI = FuncInfo.StaticAllocaMap[Slot];
MFI->setStackProtectorIndex(FI);
@@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::objectsize: {
// If we don't know by now, we're never going to know.
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
assert(CI && "Non-constant type in __builtin_object_size?");
- SDValue Arg = getValue(I.getOperand(0));
+ SDValue Arg = getValue(I.getCalledValue());
EVT Ty = Arg.getValueType();
- if (CI->getZExtValue() == 0)
+ if (CI->isZero())
Res = DAG.getConstant(-1ULL, Ty);
else
Res = DAG.getConstant(0, Ty);
@@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return 0;
case Intrinsic::init_trampoline: {
- const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
+ const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
SDValue Ops[6];
Ops[0] = getRoot();
- Ops[1] = getValue(I.getOperand(1));
- Ops[2] = getValue(I.getOperand(2));
- Ops[3] = getValue(I.getOperand(3));
- Ops[4] = DAG.getSrcValue(I.getOperand(1));
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
+ Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
Ops[5] = DAG.getSrcValue(F);
Res = DAG.getNode(ISD::TRAMPOLINE, dl,
@@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::gcroot:
if (GFI) {
- const Value *Alloca = I.getOperand(1);
- const Constant *TypeMap = cast<Constant>(I.getOperand(2));
+ const Value *Alloca = I.getArgOperand(0);
+ const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
@@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[4];
Ops[0] = getRoot();
- Ops[1] = getValue(I.getOperand(1));
- Ops[2] = getValue(I.getOperand(2));
- Ops[3] = getValue(I.getOperand(3));
+ Ops[1] = getValue(I.getArgOperand(0));
+ Ops[2] = getValue(I.getArgOperand(1));
+ Ops[3] = getValue(I.getArgOperand(2));
DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
return 0;
}
@@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Ops[6];
Ops[0] = getRoot();
for (int x = 1; x < 6; ++x)
- Ops[x] = getValue(I.getOperand(x));
+ Ops[x] = getValue(I.getArgOperand(x - 1));
DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
return 0;
@@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Root = getRoot();
SDValue L =
DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
- getValue(I.getOperand(2)).getValueType().getSimpleVT(),
+ getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
Root,
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- getValue(I.getOperand(3)),
- I.getOperand(1));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ getValue(I.getArgOperand(2)),
+ I.getArgOperand(0));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
Args.reserve(CS.arg_size());
// Check whether the function can return without sret-demotion.
- SmallVector<EVT, 4> OutVTs;
- SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ SmallVector<ISD::OutputArg, 4> Outs;
SmallVector<uint64_t, 4> Offsets;
- getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
- OutVTs, OutsFlags, TLI, &Offsets);
+ GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ Outs, TLI, &Offsets);
bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
- FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+ FTy->isVarArg(), Outs, FTy->getContext());
SDValue DemoteStackSlot;
@@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
ComputeValueVTs(TLI, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
EVT PtrVT = PVTs[0];
- unsigned NumValues = OutVTs.size();
+ unsigned NumValues = Outs.size();
SmallVector<SDValue, 4> Values(NumValues);
SmallVector<SDValue, 4> Chains(NumValues);
@@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
- SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+ SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
Add, NULL, Offsets[i], false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
@@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
/// lowered like a normal call.
bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
// Verify that the prototype makes sense. int memcmp(void*,void*,size_t)
- if (I.getNumOperands() != 4)
+ if (I.getNumArgOperands() != 3)
return false;
- const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
+ const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
- !I.getOperand(3)->getType()->isIntegerTy() ||
+ !I.getArgOperand(2)->getType()->isIntegerTy() ||
!I.getType()->isIntegerTy())
return false;
- const ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
+ const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
// memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0
// memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0
@@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
void SelectionDAGBuilder::visitCall(const CallInst &I) {
+ // Handle inline assembly differently.
+ if (isa<InlineAsm>(I.getCalledValue())) {
+ visitInlineAsm(&I);
+ return;
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
- const TargetIntrinsicInfo *II = TM.getIntrinsicInfo();
- if (II) {
+ if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
if (unsigned IID = II->getIntrinsicID(F)) {
RenameFn = visitIntrinsicCall(I, IID);
if (!RenameFn)
@@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (!F->hasLocalLinkage() && F->hasName()) {
StringRef Name = F->getName();
if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
- if (I.getNumOperands() == 3 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
- I.getType() == I.getOperand(2)->getType()) {
- SDValue LHS = getValue(I.getOperand(1));
- SDValue RHS = getValue(I.getOperand(2));
+ if (I.getNumArgOperands() == 2 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
+ I.getType() == I.getArgOperand(1)->getType()) {
+ SDValue LHS = getValue(I.getArgOperand(0));
+ SDValue RHS = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
LHS.getValueType(), LHS, RHS));
return;
}
} else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType()) {
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
}
} else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
- if (I.getNumOperands() == 2 && // Basic sanity checks.
- I.getOperand(1)->getType()->isFloatingPointTy() &&
- I.getType() == I.getOperand(1)->getType() &&
+ if (I.getNumArgOperands() == 1 && // Basic sanity checks.
+ I.getArgOperand(0)->getType()->isFloatingPointTy() &&
+ I.getType() == I.getArgOperand(0)->getType() &&
I.onlyReadsMemory()) {
- SDValue Tmp = getValue(I.getOperand(1));
+ SDValue Tmp = getValue(I.getArgOperand(0));
setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
Tmp.getValueType(), Tmp));
return;
@@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
}
- } else if (isa<InlineAsm>(I.getOperand(0))) {
- visitInlineAsm(&I);
- return;
}
-
+
SDValue Callee;
if (!RenameFn)
- Callee = getValue(I.getOperand(0));
+ Callee = getValue(I.getCalledValue());
else
Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
@@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
LowerCallTo(&I, Callee, I.isTailCall());
}
-/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
-/// this value and returns the result as a ValueVT value. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
- // Assemble the legal parts into the final values.
- SmallVector<SDValue, 4> Values(ValueVTs.size());
- SmallVector<SDValue, 8> Parts;
- for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- // Copy the legal parts from the registers.
- EVT ValueVT = ValueVTs[Value];
- unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
- EVT RegisterVT = RegVTs[Value];
-
- Parts.resize(NumRegs);
- for (unsigned i = 0; i != NumRegs; ++i) {
- SDValue P;
- if (Flag == 0) {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
- } else {
- P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
- *Flag = P.getValue(2);
- }
-
- Chain = P.getValue(1);
-
- // If the source register was virtual and if we know something about it,
- // add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
- if (FLI.LiveOutRegInfo.size() > SlotNo) {
- FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
-
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
-
- Parts[i] = P;
- }
-
- Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
- NumRegs, RegisterVT, ValueVT);
- Part += NumRegs;
- Parts.clear();
- }
-
- return DAG.getNode(ISD::MERGE_VALUES, dl,
- DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
- &Values[0], ValueVTs.size());
-}
-
-/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
-/// specified value into the registers specified by this object. This uses
-/// Chain/Flag as the input and updates them for the output Chain/Flag.
-/// If the Flag pointer is NULL, no flag is used.
-void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- SDValue &Chain, SDValue *Flag) const {
- // Get the list of the values's legal parts.
- unsigned NumRegs = Regs.size();
- SmallVector<SDValue, 8> Parts(NumRegs);
- for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
- EVT ValueVT = ValueVTs[Value];
- unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
- EVT RegisterVT = RegVTs[Value];
-
- getCopyToParts(DAG, dl,
- Val.getValue(Val.getResNo() + Value),
- &Parts[Part], NumParts, RegisterVT);
- Part += NumParts;
- }
-
- // Copy the parts into the registers.
- SmallVector<SDValue, 8> Chains(NumRegs);
- for (unsigned i = 0; i != NumRegs; ++i) {
- SDValue Part;
- if (Flag == 0) {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
- } else {
- Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
- *Flag = Part.getValue(1);
- }
-
- Chains[i] = Part.getValue(0);
- }
-
- if (NumRegs == 1 || Flag)
- // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
- // flagged to it. That is the CopyToReg nodes and the user are considered
- // a single scheduling unit. If we create a TokenFactor and return it as
- // chain, then the TokenFactor is both a predecessor (operand) of the
- // user as well as a successor (the TF operands are flagged to the user).
- // c1, f1 = CopyToReg
- // c2, f2 = CopyToReg
- // c3 = TokenFactor c1, c2
- // ...
- // = op c3, ..., f2
- Chain = Chains[NumRegs-1];
- else
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
-}
-
-/// AddInlineAsmOperands - Add this value to the specified inlineasm node
-/// operand list. This adds the code marker and includes the number of
-/// values added into it.
-void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
- unsigned MatchingIdx,
- SelectionDAG &DAG,
- std::vector<SDValue> &Ops) const {
- unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
- if (HasMatching)
- Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
- SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
- Ops.push_back(Res);
-
- for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
- unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
- EVT RegisterVT = RegVTs[Value];
- for (unsigned i = 0; i != NumRegs; ++i) {
- assert(Reg < Regs.size() && "Mismatch in # registers expected");
- Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
- }
- }
-}
-
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register. Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
- const TargetLowering &TLI,
- const TargetRegisterInfo *TRI) {
- EVT FoundVT = MVT::Other;
- const TargetRegisterClass *FoundRC = 0;
- for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
- E = TRI->regclass_end(); RCI != E; ++RCI) {
- EVT ThisVT = MVT::Other;
-
- const TargetRegisterClass *RC = *RCI;
- // If none of the value types for this register class are valid, we
- // can't use it. For example, 64-bit reg classes on 32-bit targets.
- for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
- I != E; ++I) {
- if (TLI.isTypeLegal(*I)) {
- // If we have already found this register in a different register class,
- // choose the one with the largest VT specified. For example, on
- // PowerPC, we favor f64 register classes over f32.
- if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
- ThisVT = *I;
- break;
- }
- }
- }
-
- if (ThisVT == MVT::Other) continue;
-
- // NOTE: This isn't ideal. In particular, this might allocate the
- // frame pointer in functions that need it (due to them not being taken
- // out of allocation, because a variable sized allocation hasn't been seen
- // yet). This is a slight code pessimization, but should still work.
- for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
- E = RC->allocation_order_end(MF); I != E; ++I)
- if (*I == Reg) {
- // We found a matching register class. Keep looking at others in case
- // we find one with larger registers that this physreg is also in.
- FoundRC = RC;
- FoundVT = ThisVT;
- break;
- }
- }
- return FoundRC;
-}
-
-
namespace llvm {
+
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
@@ -5041,8 +4983,56 @@ private:
Regs.insert(*Aliases);
}
};
+
} // end llvm namespace.
+/// isAllocatableRegister - If the specified register is safe to allocate,
+/// i.e. it isn't a stack pointer or some other special register, return the
+/// register class for the register. Otherwise, return null.
+static const TargetRegisterClass *
+isAllocatableRegister(unsigned Reg, MachineFunction &MF,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo *TRI) {
+ EVT FoundVT = MVT::Other;
+ const TargetRegisterClass *FoundRC = 0;
+ for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
+ E = TRI->regclass_end(); RCI != E; ++RCI) {
+ EVT ThisVT = MVT::Other;
+
+ const TargetRegisterClass *RC = *RCI;
+ // If none of the value types for this register class are valid, we
+ // can't use it. For example, 64-bit reg classes on 32-bit targets.
+ for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
+ I != E; ++I) {
+ if (TLI.isTypeLegal(*I)) {
+ // If we have already found this register in a different register class,
+ // choose the one with the largest VT specified. For example, on
+ // PowerPC, we favor f64 register classes over f32.
+ if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
+ ThisVT = *I;
+ break;
+ }
+ }
+ }
+
+ if (ThisVT == MVT::Other) continue;
+
+ // NOTE: This isn't ideal. In particular, this might allocate the
+ // frame pointer in functions that need it (due to them not being taken
+ // out of allocation, because a variable sized allocation hasn't been seen
+ // yet). This is a slight code pessimization, but should still work.
+ for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
+ E = RC->allocation_order_end(MF); I != E; ++I)
+ if (*I == Reg) {
+ // We found a matching register class. Keep looking at others in case
+ // we find one with larger registers that this physreg is also in.
+ FoundRC = RC;
+ FoundVT = ThisVT;
+ break;
+ }
+ }
+ return FoundRC;
+}
/// GetRegistersForValue - Assign registers (virtual or physical) for the
/// specified operand. We prefer to assign virtual registers, to allow the
@@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
}
}
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
@@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
for (; NumRegs; --NumRegs)
Regs.push_back(RegInfo.createVirtualRegister(RC));
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
+ OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
return;
}
@@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
for (unsigned i = RegStart; i != RegEnd; ++i)
Regs.push_back(RegClassRegs[i]);
- OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
+ OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
OpInfo.ConstraintVT);
OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
return;
@@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
+ TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
// If this is a memory input, and if the operand is not indirect, do what we
// need to to provide an address for the memory input.
@@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
+ // Remember the AlignStack bit as operand 3.
+ AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
+ MVT::i1));
+
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
RegsForValue RetValRegs;
@@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
RegsForValue MatchedRegs;
- MatchedRegs.TLI = &TLI;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
MatchedRegs.RegVTs.push_back(RegVT);
@@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
- hasMemory, Ops, DAG);
+ Ops, DAG);
if (Ops.empty())
report_fatal_error("Invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'!");
@@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Copy the input into the appropriate registers.
if (OpInfo.AssignedRegs.Regs.empty() ||
- !OpInfo.AssignedRegs.areValueTypesLegal())
+ !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
report_fatal_error("Couldn't allocate input reg for constraint '" +
Twine(OpInfo.ConstraintCode) + "'!");
@@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Finish up input operands. Set the input chain and add the flag last.
- AsmNodeOperands[0] = Chain;
+ AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
@@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this asm returns a register value, copy the result from that register
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
- SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
Chain, &Flag);
// FIXME: Why don't we do this for inline asms with MRVs?
@@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
const Value *Ptr = IndirectStoresToEmit[i].second;
- SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
+ SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
Chain, &Flag);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
+ const TargetData &TD = *TLI.getTargetData();
SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
getRoot(), getValue(I.getOperand(0)),
- DAG.getSrcValue(I.getOperand(0)));
+ DAG.getSrcValue(I.getOperand(0)),
+ TD.getABITypeAlignment(I.getType()));
setValue(&I, V);
DAG.setRoot(V.getValue(1));
}
@@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(1))));
+ getValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(0))));
}
void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
MVT::Other, getRoot(),
- getValue(I.getOperand(1)),
- getValue(I.getOperand(2)),
- DAG.getSrcValue(I.getOperand(1)),
- DAG.getSrcValue(I.getOperand(2))));
+ getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)),
+ DAG.getSrcValue(I.getArgOperand(0)),
+ DAG.getSrcValue(I.getArgOperand(1))));
}
/// TargetLowering::LowerCallTo - This is the default LowerCallTo
@@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
DebugLoc dl) const {
// Handle all of the outgoing arguments.
SmallVector<ISD::OutputArg, 32> Outs;
+ SmallVector<SDValue, 32> OutVals;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
@@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
- ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
+ ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
+ i < NumFixedArgs);
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0)
MyFlags.Flags.setOrigAlign(1);
Outs.push_back(MyFlags);
+ OutVals.push_back(Parts[j]);
}
}
}
@@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
SmallVector<SDValue, 4> InVals;
Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
// Verify that the target's LowerCall behaved as expected.
assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
@@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
void
SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
- SDValue Op = getValue(V);
+ SDValue Op = getNonRegisterValue(V);
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
@@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// If this is the entry block, emit arguments.
const Function &F = *LLVMBB->getParent();
SelectionDAG &DAG = SDB->DAG;
- SDValue OldRoot = DAG.getRoot();
DebugLoc dl = SDB->getCurDebugLoc();
const TargetData *TD = TLI.getTargetData();
SmallVector<ISD::InputArg, 16> Ins;
// Check whether the function can return without sret-demotion.
- SmallVector<EVT, 4> OutVTs;
- SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
- getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
- OutVTs, OutsFlags, TLI);
- FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
-
- FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
- OutVTs, OutsFlags, DAG);
- if (!FLI.CanLowerReturn) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ if (!FuncInfo->CanLowerReturn) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
@@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Set up the argument values.
unsigned i = 0;
Idx = 1;
- if (!FLI.CanLowerReturn) {
+ if (!FuncInfo->CanLowerReturn) {
// Create a virtual register for the sret pointer, and put in a copy
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
@@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
- FLI.DemoteRegister = SRetReg;
+ FuncInfo->DemoteRegister = SRetReg;
NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
SRetReg, ArgValue);
DAG.setRoot(NewRoot);
@@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, I->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+
+ // If this argument is unused then remember its value. It is used to generate
+ // debugging information.
+ if (I->use_empty() && NumValues)
+ SDB->setUnusedArgValue(I, InVals[i]);
+
for (unsigned Value = 0; Value != NumValues; ++Value) {
EVT VT = ValueVTs[Value];
EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
@@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
- RegOut = FuncInfo.CreateRegForValue(C);
+ RegOut = FuncInfo.CreateRegs(C->getType());
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
} else {
- Reg = FuncInfo.ValueMap[PHIOp];
- if (Reg == 0) {
+ DenseMap<const Value *, unsigned>::iterator I =
+ FuncInfo.ValueMap.find(PHIOp);
+ if (I != FuncInfo.ValueMap.end())
+ Reg = I->second;
+ else {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
- Reg = FuncInfo.CreateRegForValue(PHIOp);
+ Reg = FuncInfo.CreateRegs(PHIOp->getType());
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 3fcd4b9..46733d6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -88,6 +88,10 @@ class SelectionDAGBuilder {
DebugLoc CurDebugLoc;
DenseMap<const Value*, SDValue> NodeMap;
+
+ /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// to preserve debug information for incoming arguments.
+ DenseMap<const Value*, SDValue> UnusedArgNodeMap;
public:
/// PendingLoads - Loads are not emitted to the program immediately. We bunch
@@ -342,6 +346,8 @@ public:
void visit(unsigned Opcode, const User &I);
SDValue getValue(const Value *V);
+ SDValue getNonRegisterValue(const Value *V);
+ SDValue getValueImpl(const Value *V);
void setValue(const Value *V, SDValue NewN) {
SDValue &N = NodeMap[V];
@@ -349,6 +355,12 @@ public:
N = NewN;
}
+ void setUnusedArgValue(const Value *V, SDValue NewN) {
+ SDValue &N = UnusedArgNodeMap[V];
+ assert(N.getNode() == 0 && "Already set a value for this node!");
+ N = NewN;
+ }
+
void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
std::set<unsigned> &OutputRegs,
std::set<unsigned> &InputRegs);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 65b8d4f..08ba548 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -14,7 +14,7 @@
#define DEBUG_TYPE "isel"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
-#include "FunctionLoweringInfo.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/DebugInfo.h"
@@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
- CurDAG(new SelectionDAG(tm, *FuncInfo)),
+ CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
@@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
CurDAG->init(*MF);
- FuncInfo->set(Fn, *MF, EnableFastISel);
+ FuncInfo->set(Fn, *MF);
SDB->init(GFI, *AA);
SelectAllBasicBlocks(Fn);
@@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (MachineBasicBlock::const_iterator
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
- if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) {
+
+ // Operand 1 of an inline asm instruction indicates whether the asm
+ // needs stack or not.
+ if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
+ (TID.isCall() && !TID.isReturn())) {
MFI->setHasCalls(true);
goto done;
}
@@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there is a call to setjmp in the machine function.
MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator
+ I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ for (;;) {
+ DenseMap<unsigned, unsigned>::iterator J =
+ FuncInfo->RegFixups.find(To);
+ if (J == E) break;
+ To = J->second;
+ }
+ // Replace it.
+ MRI.replaceRegWith(From, To);
+ }
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
-MachineBasicBlock *
-SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
- const BasicBlock *LLVMBB,
- BasicBlock::const_iterator Begin,
+void
+SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
// Lower all of the non-terminator instructions. If a call is emitted
@@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB,
SDB->clear();
// Final step, emit the lowered DAG as machine code.
- return CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
namespace {
@@ -372,102 +394,6 @@ public:
};
}
-/// TrivialTruncElim - Eliminate some trivial nops that can result from
-/// ShrinkDemandedOps: (trunc (ext n)) -> n.
-static bool TrivialTruncElim(SDValue Op,
- TargetLowering::TargetLoweringOpt &TLO) {
- SDValue N0 = Op.getOperand(0);
- EVT VT = Op.getValueType();
- if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND ||
- N0.getOpcode() == ISD::ANY_EXTEND) &&
- N0.getOperand(0).getValueType() == VT) {
- return TLO.CombineTo(Op, N0.getOperand(0));
- }
- return false;
-}
-
-/// ShrinkDemandedOps - A late transformation pass that shrink expressions
-/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts
-/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
-void SelectionDAGISel::ShrinkDemandedOps() {
- SmallVector<SDNode*, 128> Worklist;
- SmallPtrSet<SDNode*, 128> InWorklist;
-
- // Add all the dag nodes to the worklist.
- Worklist.reserve(CurDAG->allnodes_size());
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- Worklist.push_back(I);
- InWorklist.insert(I);
- }
-
- TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
- while (!Worklist.empty()) {
- SDNode *N = Worklist.pop_back_val();
- InWorklist.erase(N);
-
- if (N->use_empty() && N != CurDAG->getRoot().getNode()) {
- // Deleting this node may make its operands dead, add them to the worklist
- // if they aren't already there.
- for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
- if (InWorklist.insert(N->getOperand(i).getNode()))
- Worklist.push_back(N->getOperand(i).getNode());
-
- CurDAG->DeleteNode(N);
- continue;
- }
-
- // Run ShrinkDemandedOp on scalar binary operations.
- if (N->getNumValues() != 1 ||
- !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger())
- continue;
-
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Demanded = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero, KnownOne;
- if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded,
- KnownZero, KnownOne, TLO) &&
- (N->getOpcode() != ISD::TRUNCATE ||
- !TrivialTruncElim(SDValue(N, 0), TLO)))
- continue;
-
- // Revisit the node.
- assert(!InWorklist.count(N) && "Already in worklist");
- Worklist.push_back(N);
- InWorklist.insert(N);
-
- // Replace the old value with the new one.
- DEBUG(errs() << "\nShrinkDemandedOps replacing ";
- TLO.Old.getNode()->dump(CurDAG);
- errs() << "\nWith: ";
- TLO.New.getNode()->dump(CurDAG);
- errs() << '\n');
-
- if (InWorklist.insert(TLO.New.getNode()))
- Worklist.push_back(TLO.New.getNode());
-
- SDOPsWorkListRemover DeadNodes(Worklist, InWorklist);
- CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
-
- if (!TLO.Old.getNode()->use_empty()) continue;
-
- for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands();
- i != e; ++i) {
- SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode();
- if (OpNode->hasOneUse()) {
- // Add OpNode to the end of the list to revisit.
- DeadNodes.RemoveFromWorklist(OpNode);
- Worklist.push_back(OpNode);
- InWorklist.insert(OpNode);
- }
- }
-
- DeadNodes.RemoveFromWorklist(TLO.Old.getNode());
- CurDAG->DeleteNode(TLO.Old.getNode());
- }
-}
-
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
SmallPtrSet<SDNode*, 128> VisitedNodes;
SmallVector<SDNode*, 128> Worklist;
@@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
} while (!Worklist.empty());
}
-MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
+void SelectionDAGISel::CodeGenAndEmitDAG() {
std::string GroupName;
if (TimePassesIsEnabled)
GroupName = "Instruction Selection and Scheduling";
@@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs ||
ViewSUnitDAGs)
BlockName = MF->getFunction()->getNameStr() + ":" +
- BB->getBasicBlock()->getNameStr();
+ FuncInfo->MBB->getBasicBlock()->getNameStr();
- DEBUG(dbgs() << "Initial selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump());
if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName);
// Run the DAG combiner in pre-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining 1", GroupName);
- CurDAG->Combine(Unrestricted, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
CurDAG->Combine(Unrestricted, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
BlockName);
bool Changed;
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization", GroupName);
- Changed = CurDAG->LegalizeTypes();
- } else {
+ {
+ NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump());
if (Changed) {
if (ViewDAGCombineLT)
CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize types", GroupName);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName,
+ TimePassesIsEnabled);
CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n";
+ CurDAG->dump());
}
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Vector Legalization", GroupName);
- Changed = CurDAG->LegalizeVectors();
- } else {
+ {
+ NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
Changed = CurDAG->LegalizeVectors();
}
if (Changed) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization 2", GroupName);
- CurDAG->LegalizeTypes();
- } else {
+ {
+ NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
CurDAG->LegalizeTypes();
}
@@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) {
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
// Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
+ TimePassesIsEnabled);
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n";
+ CurDAG->dump());
}
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Legalization", GroupName);
- CurDAG->Legalize(OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
CurDAG->Legalize(OptLevel);
}
- DEBUG(dbgs() << "Legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump());
if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName);
// Run the DAG combiner in post-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining 2", GroupName);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- } else {
+ {
+ NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump());
- if (OptLevel != CodeGenOpt::None) {
- ShrinkDemandedOps();
+ if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
- }
if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName);
// Third, instruction select all of the operations to machine code, adding the
// code to the MachineBasicBlock.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Selection", GroupName);
- DoInstructionSelection();
- } else {
+ {
+ NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG:\n");
- DEBUG(CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump());
if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName);
// Schedule machine code.
ScheduleDAGSDNodes *Scheduler = CreateScheduler();
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Scheduling", GroupName);
- Scheduler->Run(CurDAG, BB, BB->end());
- } else {
- Scheduler->Run(CurDAG, BB, BB->end());
+ {
+ NamedRegionTimer T("Instruction Scheduling", GroupName,
+ TimePassesIsEnabled);
+ Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt);
}
if (ViewSUnitDAGs) Scheduler->viewGraph();
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Creation", GroupName);
- BB = Scheduler->EmitSchedule();
- } else {
- BB = Scheduler->EmitSchedule();
+ {
+ NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+
+ FuncInfo->MBB = Scheduler->EmitSchedule();
+ FuncInfo->InsertPt = Scheduler->InsertPos;
}
// Free the scheduler state.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName);
- delete Scheduler;
- } else {
+ {
+ NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
+ TimePassesIsEnabled);
delete Scheduler;
}
// Free the SelectionDAG state, now that we're finished with it.
CurDAG->clear();
-
- return BB;
}
void SelectionDAGISel::DoInstructionSelection() {
@@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() {
/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and
/// do other setup for EH landing-pad blocks.
-void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
+void SelectionDAGISel::PrepareEHLandingPad() {
// Add a label to mark the beginning of the landing pad. Deletion of the
// landing pad can thus be detected via the MachineModuleInfo.
- MCSymbol *Label = MF->getMMI().addLandingPad(BB);
+ MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
- BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label);
+ BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
+ .addSym(Label);
// Mark exception register as live in.
unsigned Reg = TLI.getExceptionAddressRegister();
- if (Reg) BB->addLiveIn(Reg);
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
// Mark exception selector register as live in.
Reg = TLI.getExceptionSelectorRegister();
- if (Reg) BB->addLiveIn(Reg);
+ if (Reg) FuncInfo->MBB->addLiveIn(Reg);
// FIXME: Hack around an exception handling flaw (PR1508): the personality
// function and list of typeids logically belong to the invoke (or, if you
@@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) {
// in exceptions not being caught because no typeids are associated with
// the invoke. This may not be the only way things can go wrong, but it
// is the only way we try to work around for the moment.
- const BasicBlock *LLVMBB = BB->getBasicBlock();
+ const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock();
const BranchInst *Br = dyn_cast<BranchInst>(LLVMBB->getTerminator());
if (Br && Br->isUnconditional()) { // Critical edge?
@@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
if (EnableFastISel)
- FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap,
- FuncInfo->StaticAllocaMap,
- FuncInfo->PHINodesToUpdate
-#ifndef NDEBUG
- , FuncInfo->CatchInfoLost
-#endif
- );
+ FastIS = TLI.createFastISel(*FuncInfo);
// Iterate over all basic blocks in the function.
for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
const BasicBlock *LLVMBB = &*I;
- MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI();
BasicBlock::const_iterator const End = LLVMBB->end();
- BasicBlock::const_iterator BI = Begin;
+ BasicBlock::const_iterator BI = End;
+ FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
+
+ // Setup an EH landing-pad block.
+ if (FuncInfo->MBB->isLandingPad())
+ PrepareEHLandingPad();
+
// Lower any arguments needed in this block if this is the entry block.
if (LLVMBB == &Fn.getEntryBlock())
LowerArguments(LLVMBB);
- // Setup an EH landing-pad block.
- if (BB->isLandingPad())
- PrepareEHLandingPad(BB);
-
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
+ FastIS->startNewBlock();
+
// Emit code for any incoming arguments. This must happen before
// beginning FastISel on the entry block.
if (LLVMBB == &Fn.getEntryBlock()) {
CurDAG->setRoot(SDB->getControlRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
+
+ // If we inserted any instructions at the beginning, make a note of
+ // where they are, so we can be sure to emit subsequent instructions
+ // after them.
+ if (FuncInfo->InsertPt != FuncInfo->MBB->begin())
+ FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt));
+ else
+ FastIS->setLastLocalValue(0);
}
- FastIS->startNewBlock(BB);
+
// Do FastISel on as many instructions as possible.
- for (; BI != End; ++BI) {
+ for (; BI != Begin; --BI) {
+ const Instruction *Inst = llvm::prior(BI);
+
+ // If we no longer require this instruction, skip it.
+ if (!Inst->mayWriteToMemory() &&
+ !isa<TerminatorInst>(Inst) &&
+ !isa<DbgInfoIntrinsic>(Inst) &&
+ !FuncInfo->isExportedInst(Inst))
+ continue;
+
+ // Bottom-up: reset the insert pos at the top, after any local-value
+ // instructions.
+ FastIS->recomputeInsertPt();
+
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(BI))
+ if (FastIS->SelectInstruction(Inst))
continue;
// Then handle certain instructions as single-LLVM-Instruction blocks.
- if (isa<CallInst>(BI)) {
+ if (isa<CallInst>(Inst)) {
++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel missed call: ";
- BI->dump();
+ Inst->dump();
}
- if (!BI->getType()->isVoidTy() && !BI->use_empty()) {
- unsigned &R = FuncInfo->ValueMap[BI];
+ if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) {
+ unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
- R = FuncInfo->CreateRegForValue(BI);
+ R = FuncInfo->CreateRegs(Inst->getType());
}
bool HadTailCall = false;
- BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall);
+ SelectBasicBlock(Inst, BI, HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
- BI = End;
+ --BI;
break;
}
- // If the instruction was codegen'd with multiple blocks,
- // inform the FastISel object where to resume inserting.
- FastIS->setCurrentBlock(BB);
continue;
}
// Otherwise, give up on FastISel for the rest of the block.
// For now, be a little lenient about non-branch terminators.
- if (!isa<TerminatorInst>(BI) || isa<BranchInst>(BI)) {
+ if (!isa<TerminatorInst>(Inst) || isa<BranchInst>(Inst)) {
++NumFastIselFailures;
if (EnableFastISelVerbose || EnableFastISelAbort) {
dbgs() << "FastISel miss: ";
- BI->dump();
+ Inst->dump();
}
if (EnableFastISelAbort)
// The "fast" selector couldn't handle something and bailed.
@@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
break;
}
+
+ FastIS->recomputeInsertPt();
}
// Run SelectionDAG instruction selection on the remainder of the block
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
- if (BI != End) {
- bool HadTailCall;
- BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall);
- }
+ bool HadTailCall;
+ SelectBasicBlock(Begin, BI, HadTailCall);
- FinishBasicBlock(BB);
+ FinishBasicBlock();
FuncInfo->PHINodesToUpdate.clear();
}
@@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
void
-SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
+SelectionDAGISel::FinishBasicBlock() {
DEBUG(dbgs() << "Total amount of phi nodes to update: "
- << FuncInfo->PHINodesToUpdate.size() << "\n");
- DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
+ << FuncInfo->PHINodesToUpdate.size() << "\n";
+ for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i)
dbgs() << "Node " << i << " : ("
<< FuncInfo->PHINodesToUpdate[i].first
<< ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n");
@@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
- if (!BB->isSuccessor(PHI->getParent()))
+ if (!FuncInfo->MBB->isSuccessor(PHI->getParent()))
continue;
PHI->addOperand(
MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
return;
}
@@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Lower header first, if it wasn't already lowered
if (!SDB->BitTestCases[i].Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->BitTestCases[i].Parent;
+ FuncInfo->MBB = SDB->BitTestCases[i].Parent;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitBitTestHeader(SDB->BitTestCases[i], BB);
+ SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
- BB);
+ FuncInfo->MBB);
else
SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
- BB);
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
// Update PHI Nodes
@@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Lower header first, if it wasn't already lowered
if (!SDB->JTCases[i].first.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
- BB);
+ FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
}
// Set the current basic block to the mbb we wish to insert the code into
- BB = SDB->JTCases[i].second.MBB;
+ FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
SDB->visitJumpTable(SDB->JTCases[i].second);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- BB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
// Update PHI Nodes
for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size();
@@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
(MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB));
}
// JT BB. Just iterate over successors here
- if (BB->isSuccessor(PHIBB)) {
+ if (FuncInfo->MBB->isSuccessor(PHIBB)) {
PHI->addOperand
(MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second,
false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
}
}
@@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first;
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
- if (BB->isSuccessor(PHI->getParent())) {
+ if (FuncInfo->MBB->isSuccessor(PHI->getParent())) {
PHI->addOperand(
MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false));
- PHI->addOperand(MachineOperand::CreateMBB(BB));
+ PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB));
}
}
@@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB;
+ MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
SmallVector<MachineBasicBlock *, 2> Succs;
@@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) {
// Emit the code. Note that this could result in ThisBB being split, so
// we need to check for updates.
- SDB->visitSwitchCase(SDB->SwitchCases[i], BB);
+ SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
- ThisBB = CodeGenAndEmitDAG(BB);
+ CodeGenAndEmitDAG();
+ ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
// occur multiple times in PHINodesToUpdate. We have to be very careful to
// handle them the right number of times.
for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
- BB = Succs[i];
- // BB may have been removed from the CFG if a branch was constant folded.
- if (ThisBB->isSuccessor(BB)) {
- for (MachineBasicBlock::iterator Phi = BB->begin();
- Phi != BB->end() && Phi->isPHI();
+ FuncInfo->MBB = Succs[i];
+ FuncInfo->InsertPt = FuncInfo->MBB->end();
+ // FuncInfo->MBB may have been removed from the CFG if a branch was
+ // constant folded.
+ if (ThisBB->isSuccessor(FuncInfo->MBB)) {
+ for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin();
+ Phi != FuncInfo->MBB->end() && Phi->isPHI();
++Phi) {
// This value for this PHI node is recorded in PHINodesToUpdate.
for (unsigned pn = 0; ; ++pn) {
@@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
+ Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
if (InOps[e-1].getValueType() == MVT::Flag)
@@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
SDValue(Res, ResNumResults-1));
if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
- --ResNumResults;
+ --ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3786bd1..6cae804 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
FlaggedNodes.push_back(N);
while (!FlaggedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
+ ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
FlaggedNodes.pop_back();
if (!FlaggedNodes.empty())
O << "\n ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 44a80d3..4f38669 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -20,6 +20,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::MEMMOVE] = "memmove";
Names[RTLIB::MEMSET] = "memset";
Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4";
+ Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4";
+ Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4";
+ Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4";
+ Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8";
+ Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1";
+ Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2";
+ Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4";
+ Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8";
+ Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1";
+ Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2";
+ Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4";
+ Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4";
+ Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4";
+ Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8";
}
/// InitLibcallCallingConvs - Set default libcall CallingConvs.
@@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
- IfCvtBlockSizeLimit = 2;
- IfCvtDupBlockSizeLimit = 0;
PrefLoopAlignment = 0;
+ MinStackArgumentAlignment = 1;
+ ShouldFoldAtomicFences = false;
InitLibcallNames(LibcallRoutineNames);
InitCmpLibcallCCs(CmpLibcallCCs);
@@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const {
static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
- unsigned &NumIntermediates,
- EVT &RegisterVT,
- TargetLowering* TLI) {
+ unsigned &NumIntermediates,
+ EVT &RegisterVT,
+ TargetLowering *TLI) {
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
@@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
EVT DestVT = TLI->getRegisterType(NewVT);
RegisterVT = DestVT;
- if (EVT(DestVT).bitsLT(NewVT)) {
- // Value is expanded, e.g. i64 -> i16.
+ if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
- } else {
- // Otherwise, promotion or legal types use the same number of registers as
- // the vector decimated to the appropriate level.
- return NumVectorRegs;
- }
- return 1;
+ // Otherwise, promotion or legal types use the same number of registers as
+ // the vector decimated to the appropriate level.
+ return NumVectorRegs;
}
/// computeRegisterProperties - Once all of the register classes are added,
@@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() {
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
- if (!isTypeLegal(VT)) {
- MVT IntermediateVT;
- EVT RegisterVT;
- unsigned NumIntermediates;
- NumRegistersForVT[i] =
- getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
- RegisterVT, this);
- RegisterTypeForVT[i] = RegisterVT;
-
- // Determine if there is a legal wider type.
- bool IsLegalWiderType = false;
- EVT EltVT = VT.getVectorElementType();
- unsigned NElts = VT.getVectorNumElements();
- for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- EVT SVT = (MVT::SimpleValueType)nVT;
- if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts && NElts != 1) {
- TransformToType[i] = SVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- IsLegalWiderType = true;
- break;
- }
+ if (isTypeLegal(VT)) continue;
+
+ MVT IntermediateVT;
+ EVT RegisterVT;
+ unsigned NumIntermediates;
+ NumRegistersForVT[i] =
+ getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
+ RegisterVT, this);
+ RegisterTypeForVT[i] = RegisterVT;
+
+ // Determine if there is a legal wider type.
+ bool IsLegalWiderType = false;
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NElts = VT.getVectorNumElements();
+ for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ EVT SVT = (MVT::SimpleValueType)nVT;
+ if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT &&
+ SVT.getVectorNumElements() > NElts && NElts != 1) {
+ TransformToType[i] = SVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
+ IsLegalWiderType = true;
+ break;
}
- if (!IsLegalWiderType) {
- EVT NVT = VT.getPow2VectorType();
- if (NVT == VT) {
- // Type is already a power of 2. The default action is to split.
- TransformToType[i] = MVT::Other;
- ValueTypeActions.setTypeAction(VT, Expand);
- } else {
- TransformToType[i] = NVT;
- ValueTypeActions.setTypeAction(VT, Promote);
- }
+ }
+ if (!IsLegalWiderType) {
+ EVT NVT = VT.getPow2VectorType();
+ if (NVT == VT) {
+ // Type is already a power of 2. The default action is to split.
+ TransformToType[i] = MVT::Other;
+ ValueTypeActions.setTypeAction(VT, Expand);
+ } else {
+ TransformToType[i] = NVT;
+ ValueTypeActions.setTypeAction(VT, Promote);
}
}
}
@@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
return 1;
}
+/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// type of the given function. This does not require a DAG or a return value,
+/// and is suitable for use before any DAGs for the function are constructed.
+/// TODO: Move this out of TargetLowering.cpp.
+void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr,
+ SmallVectorImpl<ISD::OutputArg> &Outs,
+ const TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) return;
+ unsigned Offset = 0;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ unsigned PartSize = TLI.getTargetData()->getTypeAllocSize(
+ PartVT.getTypeForEVT(ReturnType->getContext()));
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true));
+ if (Offsets) {
+ Offsets->push_back(Offset);
+ Offset += PartSize;
+ }
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
/// alignment, not its logarithm.
@@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-1 bits are only known if set in both the LHS & RHS.
@@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// Output known-0 bits are only known if clear in both the LHS & RHS.
@@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownZero2 & NewMask) == NewMask)
return TLO.CombineTo(Op, Op.getOperand(1));
// If the operation can be done in a smaller type, do so.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
// If all of the unknown bits are known to be zero on one side or the other
@@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::AssertZext: {
- EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- APInt InMask = APInt::getLowBitsSet(BitWidth,
- VT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask,
+ // Demand all the bits of the input that are demanded in the output.
+ // The low bits are obvious; the high bits are demanded because we're
+ // asserting that they're zero here.
+ if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ APInt InMask = APInt::getLowBitsSet(BitWidth,
+ VT.getSizeInBits());
KnownZero |= ~InMask & NewMask;
break;
}
@@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne2, TLO, Depth+1))
return true;
// See if the operation should be performed at a smaller bit width.
- if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+ if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
return true;
}
// FALL THROUGH
@@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
/// vector. If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
switch (ConstraintLetter) {
@@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ C->getDebugLoc(),
Op.getValueType(), Offs));
return;
}
@@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
/// 'm' over 'r', for example.
///
static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
- bool hasMemory, const TargetLowering &TLI,
+ const TargetLowering &TLI,
SDValue Op, SelectionDAG *DAG) {
assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
unsigned BestIdx = 0;
TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
int BestGenerality = -1;
-
+
// Loop over the options, keeping track of the most general one.
for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
-
+
// If this is an 'other' constraint, see if the operand is valid for it.
// For example, on X86 we might have an 'rI' constraint. If the operand
// is an integer in the range [0..31] we want to use I (saving a load
@@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
- TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory,
+ TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0],
ResultOps, *DAG);
if (!ResultOps.empty()) {
BestType = CType;
@@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
}
}
+ // Things with matching constraints can only be registers, per gcc
+ // documentation. This mainly affects "g" constraints.
+ if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
+ continue;
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
SDValue Op,
- bool hasMemory,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
@@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.ConstraintCode = OpInfo.Codes[0];
OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
} else {
- ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG);
+ ChooseConstraint(OpInfo, *this, Op, DAG);
}
// 'X' matches anything.
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 5240bef..6ab0cb0 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -31,6 +31,7 @@
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/IRBuilder.h"
using namespace llvm;
@@ -158,7 +159,8 @@ namespace {
// Create a new invoke instruction.
Args.clear();
- Args.append(CI->op_begin() + 1, CI->op_end());
+ CallSite CS(CI);
+ Args.append(CS.arg_begin(), CS.arg_end());
InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
NewBB, CleanupBB,
@@ -194,7 +196,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
unsigned NumMeta = 0;
SmallVector<Constant*,16> Metadata;
for (unsigned I = 0; I != Roots.size(); ++I) {
- Constant *C = cast<Constant>(Roots[I].first->getOperand(2));
+ Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
if (!C->isNullValue())
NumMeta = I + 1;
Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
@@ -322,16 +324,16 @@ void ShadowStackGC::CollectRoots(Function &F) {
assert(Roots.empty() && "Not cleaned up?");
- SmallVector<std::pair<CallInst*,AllocaInst*>,16> MetaRoots;
+ SmallVector<std::pair<CallInst*, AllocaInst*>, 16> MetaRoots;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;)
if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++))
if (Function *F = CI->getCalledFunction())
if (F->getIntrinsicID() == Intrinsic::gcroot) {
- std::pair<CallInst*,AllocaInst*> Pair = std::make_pair(
- CI, cast<AllocaInst>(CI->getOperand(1)->stripPointerCasts()));
- if (IsNullValue(CI->getOperand(2)))
+ std::pair<CallInst*, AllocaInst*> Pair = std::make_pair(
+ CI, cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts()));
+ if (IsNullValue(CI->getArgOperand(1)))
Roots.push_back(Pair);
else
MetaRoots.push_back(Pair);
diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h
deleted file mode 100644
index f69feaf..0000000
--- a/lib/CodeGen/SimpleHazardRecognizer.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the SimpleHazardRecognizer class, which
-// implements hazard-avoidance heuristics for scheduling, based on the
-// scheduling itineraries specified for the target.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
-#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H
-
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
-namespace llvm {
- /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses
- /// a coarse classification and attempts to avoid that instructions of
- /// a given class aren't grouped too densely together.
- class SimpleHazardRecognizer : public ScheduleHazardRecognizer {
- /// Class - A simple classification for SUnits.
- enum Class {
- Other, Load, Store
- };
-
- /// Window - The Class values of the most recently issued
- /// instructions.
- Class Window[8];
-
- /// getClass - Classify the given SUnit.
- Class getClass(const SUnit *SU) {
- const MachineInstr *MI = SU->getInstr();
- const TargetInstrDesc &TID = MI->getDesc();
- if (TID.mayLoad())
- return Load;
- if (TID.mayStore())
- return Store;
- return Other;
- }
-
- /// Step - Rotate the existing entries in Window and insert the
- /// given class value in position as the most recent.
- void Step(Class C) {
- std::copy(Window+1, array_endof(Window), Window);
- Window[array_lengthof(Window)-1] = C;
- }
-
- public:
- SimpleHazardRecognizer() : Window() {
- Reset();
- }
-
- virtual HazardType getHazardType(SUnit *SU) {
- Class C = getClass(SU);
- if (C == Other)
- return NoHazard;
- unsigned Score = 0;
- for (unsigned i = 0; i != array_lengthof(Window); ++i)
- if (Window[i] == C)
- Score += i + 1;
- if (Score > array_lengthof(Window) * 2)
- return Hazard;
- return NoHazard;
- }
-
- virtual void Reset() {
- for (unsigned i = 0; i != array_lengthof(Window); ++i)
- Window[i] = Other;
- }
-
- virtual void EmitInstruction(SUnit *SU) {
- Step(getClass(SU));
- }
-
- virtual void AdvanceCycle() {
- Step(Other);
- }
- };
-}
-
-#endif
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index ed3c243..e69d3e4 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -99,15 +99,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
///
/// This returns true if an interval was modified.
///
-bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
- LiveInterval &IntB,
+bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
MachineInstr *CopyMI) {
+ // Bail if there is no dst interval - can happen when merging physical subreg
+ // operations.
+ if (!li_->hasInterval(CP.getDstReg()))
+ return false;
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- assert(BLR != IntB.end() && "Live range not found!");
+ if (BLR == IntB.end()) return false;
VNInfo *BValNo = BLR->valno;
// Get the location that B is defined at. Two options: either this value has
@@ -119,7 +127,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// AValNo is the value number in A that defines the copy, A3 in the example.
SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
- assert(ALR != IntA.end() && "Live range not found!");
+ // The live range might not exist after fun with physreg coalescing.
+ if (ALR == IntA.end()) return false;
VNInfo *AValNo = ALR->valno;
// If it's re-defined by an early clobber somewhere in the live range, then
// it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
@@ -145,26 +154,21 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// If AValNo is defined as a copy from IntB, we can potentially process this.
// Get the instruction that defines this value number.
- unsigned SrcReg = li_->getVNInfoSourceReg(AValNo);
- if (!SrcReg) return false; // Not defined by a copy.
-
- // If the value number is not defined by a copy instruction, ignore it.
-
- // If the source register comes from an interval other than IntB, we can't
- // handle this.
- if (SrcReg != IntB.reg) return false;
+ if (!CP.isCoalescable(AValNo->getCopy()))
+ return false;
// Get the LiveRange in IntB that this value number starts with.
LiveInterval::iterator ValLR =
IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
- assert(ValLR != IntB.end() && "Live range not found!");
+ if (ValLR == IntB.end())
+ return false;
// Make sure that the end of the live range is inside the same block as
// CopyMI.
MachineInstr *ValLREndInst =
li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
- if (!ValLREndInst ||
- ValLREndInst->getParent() != CopyMI->getParent()) return false;
+ if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+ return false;
// Okay, we now know that ValLR ends in the same block that the CopyMI
// live-range starts. If there are no intervening live ranges between them in
@@ -207,6 +211,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// physreg has sub-registers, update their live intervals as well.
if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
LiveInterval &SRLI = li_->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
SRLI.getNextValue(FillerStart, 0, true,
@@ -216,7 +222,6 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// Okay, merge "B1" into the same value number as "B0".
if (BValNo != ValLR->valno) {
- IntB.addKills(ValLR->valno, BValNo->kills);
IntB.MergeValueNumberInto(BValNo, ValLR->valno);
}
DEBUG({
@@ -230,13 +235,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
if (UIdx != -1) {
ValLREndInst->getOperand(UIdx).setIsKill(false);
- ValLR->valno->removeKill(FillerStart);
}
// If the copy instruction was killing the destination register before the
// merge, find the last use and trim the live range. That will also add the
// isKill marker.
- if (ALR->valno->isKill(CopyIdx))
+ if (ALR->end == CopyIdx)
TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR);
++numExtends;
@@ -304,23 +308,31 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
///
/// This returns true if an interval was modified.
///
-bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
- LiveInterval &IntB,
+bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
MachineInstr *CopyMI) {
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
-
// FIXME: For now, only eliminate the copy by commuting its def when the
// source register is a virtual register. We want to guard against cases
// where the copy is a back edge copy and commuting the def lengthen the
// live interval of the source register to the entire loop.
- if (TargetRegisterInfo::isPhysicalRegister(IntA.reg))
+ if (CP.isPhys() && CP.isFlipped())
+ return false;
+
+ // Bail if there is no dst interval.
+ if (!li_->hasInterval(CP.getDstReg()))
return false;
+ SlotIndex CopyIdx =
+ li_->getInstructionIndex(CopyMI).getDefIndex();
+
+ LiveInterval &IntA =
+ li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+ LiveInterval &IntB =
+ li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- assert(BLR != IntB.end() && "Live range not found!");
+ if (BLR == IntB.end()) return false;
VNInfo *BValNo = BLR->valno;
// Get the location that B is defined at. Two options: either this value has
@@ -342,6 +354,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
AValNo->isUnused() || AValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+ if (!DefMI)
+ return false;
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isCommutable())
return false;
@@ -380,7 +394,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
// clobbers from the superreg.
if (BHasSubRegs)
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR)
- if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
+ if (li_->hasInterval(*SR) &&
+ HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0))
return false;
// If some of the uses of IntA.reg is already coalesced away, return false.
@@ -413,7 +428,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
bool BHasPHIKill = BValNo->hasPHIKill();
SmallVector<VNInfo*, 4> BDeadValNos;
- VNInfo::KillSet BKills;
std::map<SlotIndex, SlotIndex> BExtend;
// If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
@@ -424,8 +438,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
// C = A<kill>
// ...
// = B
- //
- // then do not add kills of A to the newly created B interval.
bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
if (Extended)
BExtend[ALR->end] = BLR->end;
@@ -448,34 +460,38 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
if (ULR == IntA.end() || ULR->valno != AValNo)
continue;
- UseMO.setReg(NewReg);
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ UseMO.substPhysReg(NewReg, *tri_);
+ else
+ UseMO.setReg(NewReg);
if (UseMI == CopyMI)
continue;
if (UseMO.isKill()) {
if (Extended)
UseMO.setIsKill(false);
- else
- BKills.push_back(UseIdx.getDefIndex());
}
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ if (UseMI->isCopy()) {
+ if (UseMI->getOperand(0).getReg() != IntB.reg ||
+ UseMI->getOperand(0).getSubReg())
+ continue;
+ } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+ if (DstReg != IntB.reg || DstSubIdx)
+ continue;
+ } else
continue;
- if (DstReg == IntB.reg && DstSubIdx == 0) {
- // This copy will become a noop. If it's defining a new val#,
- // remove that val# as well. However this live range is being
- // extended to the end of the existing live range defined by the copy.
- SlotIndex DefIdx = UseIdx.getDefIndex();
- const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
- BHasPHIKill |= DLR->valno->hasPHIKill();
- assert(DLR->valno->def == DefIdx);
- BDeadValNos.push_back(DLR->valno);
- BExtend[DLR->start] = DLR->end;
- JoinedCopies.insert(UseMI);
- // If this is a kill but it's going to be removed, the last use
- // of the same val# is the new kill.
- if (UseMO.isKill())
- BKills.pop_back();
- }
+ // This copy will become a noop. If it's defining a new val#,
+ // remove that val# as well. However this live range is being
+ // extended to the end of the existing live range defined by the copy.
+ SlotIndex DefIdx = UseIdx.getDefIndex();
+ const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
+ if (!DLR)
+ continue;
+ BHasPHIKill |= DLR->valno->hasPHIKill();
+ assert(DLR->valno->def == DefIdx);
+ BDeadValNos.push_back(DLR->valno);
+ BExtend[DLR->start] = DLR->end;
+ JoinedCopies.insert(UseMI);
}
// We need to insert a new liverange: [ALR.start, LastUse). It may be we can
@@ -490,24 +506,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
VNInfo *DeadVNI = BDeadValNos[i];
if (BHasSubRegs) {
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
LiveInterval &SRLI = li_->getInterval(*SR);
- const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def);
- SRLI.removeValNo(SRLR->valno);
+ if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def))
+ SRLI.removeValNo(SRLR->valno);
}
}
IntB.removeValNo(BDeadValNos[i]);
}
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
- // is updated. Kills are also updated.
+ // is updated.
VNInfo *ValNo = BValNo;
ValNo->def = AValNo->def;
ValNo->setCopy(0);
- for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) {
- if (ValNo->kills[j] != BLR->end)
- BKills.push_back(ValNo->kills[j]);
- }
- ValNo->kills.clear();
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
@@ -517,18 +530,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
if (EI != BExtend.end())
End = EI->second;
IntB.addRange(LiveRange(AI->start, End, ValNo));
-
- // If the IntB live range is assigned to a physical register, and if that
- // physreg has sub-registers, update their live intervals as well.
- if (BHasSubRegs) {
- for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
- LiveInterval &SRLI = li_->getInterval(*SR);
- SRLI.MergeInClobberRange(*li_, AI->start, End,
- li_->getVNInfoAllocator());
- }
- }
}
- IntB.addKills(ValNo, BKills);
ValNo->setHasPHIKill(BHasPHIKill);
DEBUG({
@@ -621,7 +623,11 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
// of last use.
LastUse->setIsKill();
removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_);
- LR->valno->addKill(LastUseIdx.getDefIndex());
+ if (LastUseMI->isCopy()) {
+ MachineOperand &DefMO = LastUseMI->getOperand(0);
+ if (DefMO.getReg() == li.reg && !DefMO.getSubReg())
+ DefMO.setIsDead();
+ }
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
DstReg == li.reg && DstSubIdx == 0) {
@@ -663,6 +669,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
ValNo->isUnused() || ValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ assert(DefMI && "Defining instruction disappeared");
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isAsCheapAsAMove())
return false;
@@ -701,33 +708,20 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- SlotIndex DefIdx = CopyIdx.getDefIndex();
- const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx);
- DLR->valno->setCopy(0);
- // Don't forget to update sub-register intervals.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
- continue;
- const LiveRange *DLR =
- li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- }
- }
+ RemoveCopyFlag(DstReg, CopyMI);
// If copy kills the source register, find the last use and propagate
// kill.
bool checkForDeadDef = false;
MachineBasicBlock *MBB = CopyMI->getParent();
- if (SrcLR->valno->isKill(DefIdx))
+ if (SrcLR->end == CopyIdx.getDefIndex())
if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
checkForDeadDef = true;
}
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
- tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
MachineInstr *NewMI = prior(MII);
if (checkForDeadDef) {
@@ -747,24 +741,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
MachineOperand &MO = CopyMI->getOperand(i);
if (MO.isReg() && MO.isImplicit())
NewMI->addOperand(MO);
- if (MO.isDef() && li_->hasInterval(MO.getReg())) {
- unsigned Reg = MO.getReg();
- const LiveRange *DLR =
- li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- // Handle subregs as well
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) {
- if (!li_->hasInterval(*SR))
- continue;
- const LiveRange *DLR =
- li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
- if (DLR && DLR->valno->getCopy() == CopyMI)
- DLR->valno->setCopy(0);
- }
- }
- }
+ if (MO.isDef())
+ RemoveCopyFlag(MO.getReg(), CopyMI);
}
TransferImplicitOps(CopyMI, NewMI);
@@ -783,84 +761,72 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
void
-SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
- unsigned SubIdx) {
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (DstIsPhys && SubIdx) {
- // Figure out the real physical register we are updating with.
- DstReg = tri_->getSubReg(DstReg, SubIdx);
- SubIdx = 0;
- }
-
- // Copy the register use-list before traversing it. We may be adding operands
- // and invalidating pointers.
- SmallVector<std::pair<MachineInstr*, unsigned>, 32> reglist;
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg),
- E = mri_->reg_end(); I != E; ++I)
- reglist.push_back(std::make_pair(&*I, I.getOperandNo()));
-
- for (unsigned N=0; N != reglist.size(); ++N) {
- MachineInstr *UseMI = reglist[N].first;
- MachineOperand &O = UseMI->getOperand(reglist[N].second);
- unsigned OldSubIdx = O.getSubReg();
+SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
+ bool DstIsPhys = CP.isPhys();
+ unsigned SrcReg = CP.getSrcReg();
+ unsigned DstReg = CP.getDstReg();
+ unsigned SubIdx = CP.getSubIdx();
+
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+ // instead.
if (DstIsPhys) {
- unsigned UseDstReg = DstReg;
- if (OldSubIdx)
- UseDstReg = tri_->getSubReg(DstReg, OldSubIdx);
-
unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
CopySrcSubIdx, CopyDstSubIdx) &&
- CopySrcSubIdx == 0 &&
- CopyDstSubIdx == 0 &&
- CopySrcReg != CopyDstReg &&
- CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
- // If the use is a copy and it won't be coalesced away, and its source
- // is defined by a trivial computation, try to rematerialize it instead.
- if (!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg,
- CopyDstSubIdx, UseMI))
- continue;
- }
+ CopySrcSubIdx == 0 && CopyDstSubIdx == 0 &&
+ CopySrcReg != CopyDstReg && CopySrcReg == SrcReg &&
+ CopyDstReg != DstReg && !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0,
+ UseMI))
+ continue;
- O.setReg(UseDstReg);
- O.setSubReg(0);
- if (OldSubIdx) {
- // Def and kill of subregister of a virtual register actually defs and
- // kills the whole register. Add imp-defs and imp-kills as needed.
- if (O.isDef()) {
- if(O.isDead())
- UseMI->addRegisterDead(DstReg, tri_, true);
- else
- UseMI->addRegisterDefined(DstReg, tri_);
- } else if (!O.isUndef() &&
- (O.isKill() ||
- UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
- UseMI->addRegisterKilled(DstReg, tri_, true);
- }
+ if (UseMI->isCopy() &&
+ !UseMI->getOperand(1).getSubReg() &&
+ !UseMI->getOperand(0).getSubReg() &&
+ UseMI->getOperand(1).getReg() == SrcReg &&
+ UseMI->getOperand(0).getReg() != SrcReg &&
+ UseMI->getOperand(0).getReg() != DstReg &&
+ !JoinedCopies.count(UseMI) &&
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+ UseMI->getOperand(0).getReg(), 0, UseMI))
+ continue;
+ }
- DEBUG({
- dbgs() << "\t\tupdated: ";
- if (!UseMI->isDebugValue())
- dbgs() << li_->getInstructionIndex(UseMI) << "\t";
- dbgs() << *UseMI;
- });
- continue;
+ SmallVector<unsigned,8> Ops;
+ bool Reads, Writes;
+ tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+ bool Kills = false, Deads = false;
+
+ // Replace SrcReg with DstReg in all UseMI operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ MachineOperand &MO = UseMI->getOperand(Ops[i]);
+ Kills |= MO.isKill();
+ Deads |= MO.isDead();
+
+ if (DstIsPhys)
+ MO.substPhysReg(DstReg, *tri_);
+ else
+ MO.substVirtReg(DstReg, SubIdx, *tri_);
}
- // Sub-register indexes goes from small to large. e.g.
- // RAX: 1 -> AL, 2 -> AX, 3 -> EAX
- // EAX: 1 -> AL, 2 -> AX
- // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
- // sub-register 2 is also AX.
- //
- // FIXME: Properly compose subreg indices for all targets.
- //
- if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
- ;
- else if (SubIdx)
- O.setSubReg(SubIdx);
- O.setReg(DstReg);
+ // This instruction is a copy that will be removed.
+ if (JoinedCopies.count(UseMI))
+ continue;
+
+ if (SubIdx) {
+ // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+ // read-modify-write of DstReg.
+ if (Deads)
+ UseMI->addRegisterDead(DstReg, tri_);
+ else if (!Reads && Writes)
+ UseMI->addRegisterDefined(DstReg, tri_);
+
+ // Kill flags apply to the whole physical register.
+ if (DstIsPhys && Kills)
+ UseMI->addRegisterKilled(DstReg, tri_);
+ }
DEBUG({
dbgs() << "\t\tupdated: ";
@@ -869,15 +835,15 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
dbgs() << *UseMI;
});
+
// After updating the operand, check if the machine instruction has
// become a copy. If so, update its val# information.
- if (JoinedCopies.count(UseMI))
+ const TargetInstrDesc &TID = UseMI->getDesc();
+ if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2)
continue;
- const TargetInstrDesc &TID = UseMI->getDesc();
unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
- if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 &&
- tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
+ if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
CopySrcSubIdx, CopyDstSubIdx) &&
CopySrcReg != CopyDstReg &&
(TargetRegisterInfo::isVirtualRegister(CopyDstReg) ||
@@ -945,6 +911,27 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
return removeIntervalIfEmpty(li, li_, tri_);
}
+void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
+ const MachineInstr *CopyMI) {
+ SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+ if (li_->hasInterval(DstReg)) {
+ LiveInterval &LI = li_->getInterval(DstReg);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->getCopy() == CopyMI)
+ LR->valno->setCopy(0);
+ }
+ if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+ return;
+ for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+ if (!li_->hasInterval(*AS))
+ continue;
+ LiveInterval &LI = li_->getInterval(*AS);
+ if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+ if (LR->valno->getCopy() == CopyMI)
+ LR->valno->setCopy(0);
+ }
+}
+
/// PropagateDeadness - Propagate the dead marker to the instruction which
/// defines the val#.
static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
@@ -978,8 +965,8 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
// Live-in to the function but dead. Remove it from entry live-in set.
if (mf_->begin()->isLiveIn(li.reg))
mf_->begin()->removeLiveIn(li.reg);
- const LiveRange *LR = li.getLiveRangeContaining(CopyIdx);
- removeRange(li, LR->start, LR->end, li_, tri_);
+ if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx))
+ removeRange(li, LR->start, LR->end, li_, tri_);
return removeIntervalIfEmpty(li, li_, tri_);
}
@@ -1017,147 +1004,12 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li,
// val#, then propagate the dead marker.
PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_);
++numDeadValNo;
-
- if (LR->valno->isKill(RemoveEnd))
- LR->valno->removeKill(RemoveEnd);
}
removeRange(li, RemoveStart, RemoveEnd, li_, tri_);
return removeIntervalIfEmpty(li, li_, tri_);
}
-/// CanCoalesceWithImpDef - Returns true if the specified copy instruction
-/// from an implicit def to another register can be coalesced away.
-bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
- LiveInterval &li,
- LiveInterval &ImpLi) const{
- if (!CopyMI->killsRegister(ImpLi.reg))
- return false;
- // Make sure this is the only use.
- for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg),
- UE = mri_->use_end(); UI != UE;) {
- MachineInstr *UseMI = &*UI;
- ++UI;
- if (CopyMI == UseMI || JoinedCopies.count(UseMI))
- continue;
- return false;
- }
- return true;
-}
-
-
-/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
-/// a virtual destination register with physical source register.
-bool
-SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt,
- LiveInterval &SrcInt) {
- // If the virtual register live interval is long but it has low use desity,
- // do not join them, instead mark the physical register as its allocation
- // preference.
- const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(DstInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(DstInt.reg),
- mri_->use_nodbg_end()) * Threshold < Length)
- return false;
-
- // If the virtual register live interval extends into a loop, turn down
- // aggressiveness.
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
- const MachineLoop *L = loopInfo->getLoopFor(CopyMBB);
- if (!L) {
- // Let's see if the virtual register live interval extends into the loop.
- LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx);
- assert(DLR != DstInt.end() && "Live range not found!");
- DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot());
- if (DLR != DstInt.end()) {
- CopyMBB = li_->getMBBFromIndex(DLR->start);
- L = loopInfo->getLoopFor(CopyMBB);
- }
- }
-
- if (!L || Length <= Threshold)
- return true;
-
- SlotIndex UseIdx = CopyIdx.getUseIndex();
- LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
- MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
- if (loopInfo->getLoopFor(SMBB) != L) {
- if (!loopInfo->isLoopHeader(CopyMBB))
- return false;
- // If vr's live interval extends pass the loop header, do not join.
- for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(),
- SE = CopyMBB->succ_end(); SI != SE; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
- if (SuccMBB == CopyMBB)
- continue;
- if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB),
- li_->getMBBEndIdx(SuccMBB)))
- return false;
- }
- }
- return true;
-}
-
-/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
-/// copy from a virtual source register to a physical destination register.
-bool
-SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt,
- LiveInterval &SrcInt) {
- // If the virtual register live interval is long but it has low use density,
- // do not join them, instead mark the physical register as its allocation
- // preference.
- const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(SrcInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(SrcInt.reg),
- mri_->use_nodbg_end()) * Threshold < Length)
- return false;
-
- if (SrcInt.empty())
- // Must be implicit_def.
- return false;
-
- // If the virtual register live interval is defined or cross a loop, turn
- // down aggressiveness.
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
- SlotIndex UseIdx = CopyIdx.getUseIndex();
- LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx);
- assert(SLR != SrcInt.end() && "Live range not found!");
- SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot());
- if (SLR == SrcInt.end())
- return true;
- MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start);
- const MachineLoop *L = loopInfo->getLoopFor(SMBB);
-
- if (!L || Length <= Threshold)
- return true;
-
- if (loopInfo->getLoopFor(CopyMBB) != L) {
- if (SMBB != L->getLoopLatch())
- return false;
- // If vr's live interval is extended from before the loop latch, do not
- // join.
- for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(),
- PE = SMBB->pred_end(); PI != PE; ++PI) {
- MachineBasicBlock *PredMBB = *PI;
- if (PredMBB == SMBB)
- continue;
- if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB),
- li_->getMBBEndIdx(PredMBB)))
- return false;
- }
- }
- return true;
-}
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
@@ -1203,157 +1055,6 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
return true;
}
-/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
-/// register with a physical register, check if any of the virtual register
-/// operand is a sub-register use or def. If so, make sure it won't result
-/// in an illegal extract_subreg or insert_subreg instruction. e.g.
-/// vr1024 = extract_subreg vr1025, 1
-/// ...
-/// vr1024 = mov8rr AH
-/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since
-/// AH does not have a super-reg whose sub-register 1 is AH.
-bool
-SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
- unsigned VirtReg,
- unsigned PhysReg) {
- for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg),
- E = mri_->reg_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- if (O.isDebug())
- continue;
- MachineInstr *MI = &*I;
- if (MI == CopyMI || JoinedCopies.count(MI))
- continue;
- unsigned SubIdx = O.getSubReg();
- if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx))
- return true;
- if (MI->isExtractSubreg()) {
- SubIdx = MI->getOperand(2).getImm();
- if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx))
- return true;
- if (O.isDef()) {
- unsigned SrcReg = MI->getOperand(1).getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isPhysicalRegister(SrcReg)
- ? tri_->getPhysicalRegisterRegClass(SrcReg)
- : mri_->getRegClass(SrcReg);
- if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
- return true;
- }
- }
- if (MI->isInsertSubreg() || MI->isSubregToReg()) {
- SubIdx = MI->getOperand(3).getImm();
- if (VirtReg == MI->getOperand(0).getReg()) {
- if (!tri_->getSubReg(PhysReg, SubIdx))
- return true;
- } else {
- unsigned DstReg = MI->getOperand(0).getReg();
- const TargetRegisterClass *RC =
- TargetRegisterInfo::isPhysicalRegister(DstReg)
- ? tri_->getPhysicalRegisterRegClass(DstReg)
- : mri_->getRegClass(DstReg);
- if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC))
- return true;
- }
- }
- }
- return false;
-}
-
-
-/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
-/// an extract_subreg where dst is a physical register, e.g.
-/// cl = EXTRACT_SUBREG reg1024, 1
-bool
-SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
- unsigned SrcReg, unsigned SubIdx,
- unsigned &RealDstReg) {
- const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
- RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
- if (!RealDstReg) {
- DEBUG(dbgs() << "\tIncompatible source regclass: "
- << "none of the super-registers of " << tri_->getName(DstReg)
- << " are in " << RC->getName() << ".\n");
- return false;
- }
-
- LiveInterval &RHS = li_->getInterval(SrcReg);
- // For this type of EXTRACT_SUBREG, conservatively
- // check if the live interval of the source register interfere with the
- // actual super physical register we are trying to coalesce with.
- if (li_->hasInterval(RealDstReg) &&
- RHS.overlaps(li_->getInterval(RealDstReg))) {
- DEBUG({
- dbgs() << "\t\tInterfere with register ";
- li_->getInterval(RealDstReg).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
- // Do not check DstReg or its sub-register. JoinIntervals() will take care
- // of that.
- if (*SR != DstReg &&
- !tri_->isSubRegister(DstReg, *SR) &&
- li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\t\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- return true;
-}
-
-/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
-/// an insert_subreg where src is a physical register, e.g.
-/// reg1024 = INSERT_SUBREG reg1024, c1, 0
-bool
-SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
- unsigned SrcReg, unsigned SubIdx,
- unsigned &RealSrcReg) {
- const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
- RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
- if (!RealSrcReg) {
- DEBUG(dbgs() << "\tIncompatible destination regclass: "
- << "none of the super-registers of " << tri_->getName(SrcReg)
- << " are in " << RC->getName() << ".\n");
- return false;
- }
-
- LiveInterval &LHS = li_->getInterval(DstReg);
- if (li_->hasInterval(RealSrcReg) &&
- LHS.overlaps(li_->getInterval(RealSrcReg))) {
- DEBUG({
- dbgs() << "\t\tInterfere with register ";
- li_->getInterval(RealSrcReg).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
- // Do not check SrcReg or its sub-register. JoinIntervals() will take care
- // of that.
- if (*SR != SrcReg &&
- !tri_->isSubRegister(SrcReg, *SR) &&
- li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\t\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false; // Not coalescable
- }
- return true;
-}
-
-/// getRegAllocPreference - Return register allocation preference register.
-///
-static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
- MachineRegisterInfo *MRI,
- const TargetRegisterInfo *TRI) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return 0;
- std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
- return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
-}
/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
@@ -1369,354 +1070,97 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
- unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0;
- bool isExtSubReg = CopyMI->isExtractSubreg();
- bool isInsSubReg = CopyMI->isInsertSubreg();
- bool isSubRegToReg = CopyMI->isSubregToReg();
- unsigned SubIdx = 0;
- if (isExtSubReg) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubIdx = CopyMI->getOperand(0).getSubReg();
- SrcReg = CopyMI->getOperand(1).getReg();
- SrcSubIdx = CopyMI->getOperand(2).getImm();
- } else if (isInsSubReg || isSubRegToReg) {
- DstReg = CopyMI->getOperand(0).getReg();
- DstSubIdx = CopyMI->getOperand(3).getImm();
- SrcReg = CopyMI->getOperand(2).getReg();
- SrcSubIdx = CopyMI->getOperand(2).getSubReg();
- if (SrcSubIdx && SrcSubIdx != DstSubIdx) {
- // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already "
- "coalesced to another register.\n");
- return false; // Not coalescable.
- }
- } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) {
- // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill>
- Again = true;
- return false; // Not coalescable.
- }
- } else {
- llvm_unreachable("Unrecognized copy instruction!");
+ CoalescerPair CP(*tii_, *tri_);
+ if (!CP.setRegisters(CopyMI)) {
+ DEBUG(dbgs() << "\tNot coalescable.\n");
+ return false;
}
// If they are already joined we continue.
- if (SrcReg == DstReg) {
+ if (CP.getSrcReg() == CP.getDstReg()) {
DEBUG(dbgs() << "\tCopy already coalesced.\n");
return false; // Not coalescable.
}
- bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-
- // If they are both physical registers, we cannot join them.
- if (SrcIsPhys && DstIsPhys) {
- DEBUG(dbgs() << "\tCan not coalesce physregs.\n");
- return false; // Not coalescable.
- }
-
- // We only join virtual registers with allocatable physical registers.
- if (SrcIsPhys && !allocatableRegs_[SrcReg]) {
- DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n");
- return false; // Not coalescable.
- }
- if (DstIsPhys && !allocatableRegs_[DstReg]) {
- DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n");
- return false; // Not coalescable.
- }
-
- // We cannot handle dual subreg indices and mismatched classes at the same
- // time.
- if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) {
- DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n");
- return false;
- }
+ DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
- // Check that a physical source register is compatible with dst regclass
- if (SrcIsPhys) {
- unsigned SrcSubReg = SrcSubIdx ?
- tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg;
- const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
- const TargetRegisterClass *DstSubRC = DstRC;
- if (DstSubIdx)
- DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx);
- assert(DstSubRC && "Illegal subregister index");
- if (!DstSubRC->contains(SrcSubReg)) {
- DEBUG(dbgs() << "\tIncompatible destination regclass: "
- << "none of the super-registers of "
- << tri_->getName(SrcSubReg) << " are in "
- << DstSubRC->getName() << ".\n");
- return false; // Not coalescable.
- }
- }
-
- // Check that a physical dst register is compatible with source regclass
- if (DstIsPhys) {
- unsigned DstSubReg = DstSubIdx ?
- tri_->getSubReg(DstReg, DstSubIdx) : DstReg;
- const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg);
- const TargetRegisterClass *SrcSubRC = SrcRC;
- if (SrcSubIdx)
- SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx);
- assert(SrcSubRC && "Illegal subregister index");
- if (!SrcSubRC->contains(DstSubReg)) {
- DEBUG(dbgs() << "\tIncompatible source regclass: "
- << "none of the super-registers of "
- << tri_->getName(DstSubReg) << " are in "
- << SrcSubRC->getName() << ".\n");
- (void)DstSubReg;
- return false; // Not coalescable.
+ // Enforce policies.
+ if (CP.isPhys()) {
+ DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+ // Only coalesce to allocatable physreg.
+ if (!allocatableRegs_[CP.getDstReg()]) {
+ DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+ return false; // Not coalescable.
}
- }
-
- // Should be non-null only when coalescing to a sub-register class.
- bool CrossRC = false;
- const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
- const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
- const TargetRegisterClass *NewRC = NULL;
- unsigned RealDstReg = 0;
- unsigned RealSrcReg = 0;
- if (isExtSubReg || isInsSubReg || isSubRegToReg) {
- SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm();
- if (SrcIsPhys && isExtSubReg) {
- // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be
- // coalesced with AX.
- unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg();
- if (DstSubIdx) {
- // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- if (DstSubIdx != SubIdx) {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- } else
- SrcReg = tri_->getSubReg(SrcReg, SubIdx);
- SubIdx = 0;
- } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) {
- // EAX = INSERT_SUBREG EAX, r1024, 0
- unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg();
- if (SrcSubIdx) {
- // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- if (SrcSubIdx != SubIdx) {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- } else
- DstReg = tri_->getSubReg(DstReg, SubIdx);
- SubIdx = 0;
- } else if ((DstIsPhys && isExtSubReg) ||
- (SrcIsPhys && (isInsSubReg || isSubRegToReg))) {
- if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) {
- DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg"
- << " of a super-class.\n");
- return false; // Not coalescable.
- }
-
- // FIXME: The following checks are somewhat conservative. Perhaps a better
- // way to implement this is to treat this as coalescing a vr with the
- // super physical register.
- if (isExtSubReg) {
- if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
- return false; // Not coalescable
- } else {
- if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
- return false; // Not coalescable
- }
- SubIdx = 0;
- } else {
- unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg()
- : CopyMI->getOperand(2).getSubReg();
- if (OldSubIdx) {
- if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg))
- // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been
- // coalesced to a larger register so the subreg indices cancel out.
- // Also check if the other larger register is of the same register
- // class as the would be resulting register.
- SubIdx = 0;
- else {
- DEBUG(dbgs() << "\t Sub-register indices mismatch.\n");
- return false; // Not coalescable.
- }
- }
- if (SubIdx) {
- if (!DstIsPhys && !SrcIsPhys) {
- if (isInsSubReg || isSubRegToReg) {
- NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx);
- } else // extract_subreg {
- NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx);
- }
- if (!NewRC) {
- DEBUG(dbgs() << "\t Conflicting sub-register indices.\n");
- return false; // Not coalescable
- }
+ } else {
+ DEBUG({
+ dbgs() << " with reg%" << CP.getDstReg();
+ if (CP.getSubIdx())
+ dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
+ dbgs() << " to " << CP.getNewRC()->getName() << "\n";
+ });
- if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
- DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
- << SrcRC->getName() << "/"
- << DstRC->getName() << " -> "
- << NewRC->getName() << ".\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- }
- }
- } else if (differingRegisterClasses(SrcReg, DstReg)) {
- if (DisableCrossClassJoin)
- return false;
- CrossRC = true;
-
- // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced
- // with another? If it's the resulting destination register, then
- // the subidx must be propagated to uses (but only those defined
- // by the EXTRACT_SUBREG). If it's being coalesced into another
- // register, it should be safe because register is assumed to have
- // the register class of the super-register.
-
- // Process moves where one of the registers have a sub-register index.
- MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg);
- MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg);
- SubIdx = DstMO->getSubReg();
- if (SubIdx) {
- if (SrcMO->getSubReg())
- // FIXME: can we handle this?
+ // Avoid constraining virtual register regclass too much.
+ if (CP.isCrossClass()) {
+ if (DisableCrossClassJoin) {
+ DEBUG(dbgs() << "\tCross-class joins disabled.\n");
return false;
- // This is not an insert_subreg but it looks like one.
- // e.g. %reg1024:4 = MOV32rr %EAX
- isInsSubReg = true;
- if (SrcIsPhys) {
- if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg))
- return false; // Not coalescable
- SubIdx = 0;
- }
- } else {
- SubIdx = SrcMO->getSubReg();
- if (SubIdx) {
- // This is not a extract_subreg but it looks like one.
- // e.g. %cl = MOV16rr %reg1024:1
- isExtSubReg = true;
- if (DstIsPhys) {
- if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg))
- return false; // Not coalescable
- SubIdx = 0;
- }
- }
- }
-
- // Now determine the register class of the joined register.
- if (!SrcIsPhys && !DstIsPhys) {
- if (isExtSubReg) {
- NewRC =
- SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
- } else if (isInsSubReg) {
- NewRC =
- SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
- } else {
- NewRC = getCommonSubClass(SrcRC, DstRC);
- }
-
- if (!NewRC) {
- DEBUG(dbgs() << "\tDisjoint regclasses: "
- << SrcRC->getName() << ", "
- << DstRC->getName() << ".\n");
- return false; // Not coalescable.
}
-
- // If we are joining two virtual registers and the resulting register
- // class is more restrictive (fewer register, smaller size). Check if it's
- // worth doing the merge.
- if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
+ if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+ mri_->getRegClass(CP.getSrcReg()),
+ mri_->getRegClass(CP.getDstReg()),
+ CP.getNewRC())) {
DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: "
- << SrcRC->getName() << "/"
- << DstRC->getName() << " -> "
- << NewRC->getName() << ".\n");
- // Allow the coalescer to try again in case either side gets coalesced to
- // a physical register that's compatible with the other side. e.g.
- // r1024 = MOV32to32_ r1025
- // But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
+ << CP.getNewRC()->getName() << ".\n");
Again = true; // May be possible to coalesce later.
return false;
}
}
- }
-
- // Will it create illegal extract_subreg / insert_subreg?
- if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg))
- return false;
- if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg))
- return false;
-
- LiveInterval &SrcInt = li_->getInterval(SrcReg);
- LiveInterval &DstInt = li_->getInterval(DstReg);
- assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg &&
- "Register mapping is horribly broken!");
- DEBUG({
- dbgs() << "\t\tInspecting ";
- if (SrcRC) dbgs() << SrcRC->getName() << ": ";
- SrcInt.print(dbgs(), tri_);
- dbgs() << "\n\t\t and ";
- if (DstRC) dbgs() << DstRC->getName() << ": ";
- DstInt.print(dbgs(), tri_);
- dbgs() << "\n";
- });
+ // When possible, let DstReg be the larger interval.
+ if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+ li_->getInterval(CP.getDstReg()).ranges.size())
+ CP.flip();
+ }
+
+ // We need to be careful about coalescing a source physical register with a
+ // virtual register. Once the coalescing is done, it cannot be broken and
+ // these are not spillable! If the destination interval uses are far away,
+ // think twice about coalescing them!
+ // FIXME: Why are we skipping this test for partial copies?
+ // CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+ if (!CP.isPartial() && CP.isPhys()) {
+ LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+ // Don't join with physregs that have a ridiculous number of live
+ // ranges. The data structure performance is really bad when that
+ // happens.
+ if (li_->hasInterval(CP.getDstReg()) &&
+ li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+ mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
+ ++numAborts;
+ DEBUG(dbgs()
+ << "\tPhysical register live interval too complicated, abort!\n");
+ return false;
+ }
- // Save a copy of the virtual register live interval. We'll manually
- // merge this into the "real" physical register live interval this is
- // coalesced with.
- OwningPtr<LiveInterval> SavedLI;
- if (RealDstReg)
- SavedLI.reset(li_->dupInterval(&SrcInt));
- else if (RealSrcReg)
- SavedLI.reset(li_->dupInterval(&DstInt));
-
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
- // Check if it is necessary to propagate "isDead" property.
- MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
- bool isDead = mopd->isDead();
-
- // We need to be careful about coalescing a source physical register with a
- // virtual register. Once the coalescing is done, it cannot be broken and
- // these are not spillable! If the destination interval uses are far away,
- // think twice about coalescing them!
- if (!isDead && (SrcIsPhys || DstIsPhys)) {
- // If the virtual register live interval is long but it has low use
- // density, do not join them, instead mark the physical register as its
- // allocation preference.
- LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
- LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt;
- unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
- unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
-
- // Don't join with physregs that have a ridiculous number of live
- // ranges. The data structure performance is really bad when that
- // happens.
- if (JoinPInt.ranges.size() > 1000) {
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs()
- << "\tPhysical register live interval too complicated, abort!\n");
- return false;
- }
+ const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ if (Length > Threshold &&
+ std::distance(mri_->use_nodbg_begin(CP.getSrcReg()),
+ mri_->use_nodbg_end()) * Threshold < Length) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+ return true;
- const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
- if (Length > Threshold &&
- std::distance(mri_->use_nodbg_begin(JoinVReg),
- mri_->use_nodbg_end()) * Threshold < Length) {
- // Before giving up coalescing, if definition of source is defined by
- // trivial computation, try rematerializing it.
- if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
- return true;
-
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
+ mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg());
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
}
}
@@ -1724,32 +1168,24 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// Otherwise, if one of the intervals being joined is a physreg, this method
// always canonicalizes DstInt to be it. The output "SrcInt" will not have
// been modified, so we can use this information below to update aliases.
- bool Swapped = false;
- // If SrcInt is implicitly defined, it's safe to coalesce.
- if (SrcInt.empty()) {
- if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
- // Only coalesce an empty interval (defined by implicit_def) with
- // another interval which has a valno defined by the CopyMI and the CopyMI
- // is a kill of the implicit def.
- DEBUG(dbgs() << "\tNot profitable!\n");
- return false;
- }
- } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
+ if (!JoinIntervals(CP)) {
// Coalescing failed.
// If definition of source is defined by trivial computation, try
// rematerializing it.
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
- ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
+ if (!CP.isFlipped() &&
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+ CP.getDstReg(), 0, CopyMI))
return true;
// If we can eliminate the copy without merging the live ranges, do so now.
- if (!isExtSubReg && !isInsSubReg && !isSubRegToReg &&
- (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
- RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
- JoinedCopies.insert(CopyMI);
- DEBUG(dbgs() << "\tTrivial!\n");
- return true;
+ if (!CP.isPartial()) {
+ if (AdjustCopiesBackFrom(CP, CopyMI) ||
+ RemoveCopyByCommutingDef(CP, CopyMI)) {
+ JoinedCopies.insert(CopyMI);
+ DEBUG(dbgs() << "\tTrivial!\n");
+ return true;
+ }
}
// Otherwise, we are unable to join the intervals.
@@ -1758,86 +1194,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false;
}
- LiveInterval *ResSrcInt = &SrcInt;
- LiveInterval *ResDstInt = &DstInt;
- if (Swapped) {
- std::swap(SrcReg, DstReg);
- std::swap(ResSrcInt, ResDstInt);
- }
- assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- "LiveInterval::join didn't work right!");
-
- // If we're about to merge live ranges into a physical register live interval,
- // we have to update any aliased register's live ranges to indicate that they
- // have clobbered values for this range.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- // If this is a extract_subreg where dst is a physical register, e.g.
- // cl = EXTRACT_SUBREG reg1024, 1
- // then create and update the actual physical register allocated to RHS.
- if (RealDstReg || RealSrcReg) {
- LiveInterval &RealInt =
- li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg);
- for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(),
- E = SavedLI->vni_end(); I != E; ++I) {
- const VNInfo *ValNo = *I;
- VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(),
- false, // updated at *
- li_->getVNInfoAllocator());
- NewValNo->setFlags(ValNo->getFlags()); // * updated here.
- RealInt.addKills(NewValNo, ValNo->kills);
- RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
- }
- RealInt.weight += SavedLI->weight;
- DstReg = RealDstReg ? RealDstReg : RealSrcReg;
- }
-
- // Update the liveintervals of sub-registers.
- for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS)
- li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt,
- li_->getVNInfoAllocator());
- }
-
- // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the
- // larger super-register.
- if ((isExtSubReg || isInsSubReg || isSubRegToReg) &&
- !SrcIsPhys && !DstIsPhys) {
- if ((isExtSubReg && !Swapped) ||
- ((isInsSubReg || isSubRegToReg) && Swapped)) {
- ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
- std::swap(SrcReg, DstReg);
- std::swap(ResSrcInt, ResDstInt);
- }
- }
-
// Coalescing to a virtual register that is of a sub-register class of the
// other. Make sure the resulting register is set to the right register class.
- if (CrossRC)
+ if (CP.isCrossClass()) {
++numCrossRCs;
-
- // This may happen even if it's cross-rc coalescing. e.g.
- // %reg1026<def> = SUBREG_TO_REG 0, %reg1037<kill>, 4
- // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to
- // be allocate a register from GR64_ABCD.
- if (NewRC)
- mri_->setRegClass(DstReg, NewRC);
+ mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+ }
// Remember to delete the copy instruction.
JoinedCopies.insert(CopyMI);
- UpdateRegDefsUses(SrcReg, DstReg, SubIdx);
+ UpdateRegDefsUses(CP);
// If we have extended the live range of a physical register, make sure we
// update live-in lists as well.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- const LiveInterval &VRegInterval = li_->getInterval(SrcReg);
+ if (CP.isPhys()) {
SmallVector<MachineBasicBlock*, 16> BlockSeq;
- for (LiveInterval::const_iterator I = VRegInterval.begin(),
- E = VRegInterval.end(); I != E; ++I ) {
+ // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+ // ranges for this, and they are preserved.
+ LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+ for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+ I != E; ++I ) {
li_->findLiveInMBBs(I->start, I->end, BlockSeq);
for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
MachineBasicBlock &block = *BlockSeq[idx];
- if (!block.isLiveIn(DstReg))
- block.addLiveIn(DstReg);
+ if (!block.isLiveIn(CP.getDstReg()))
+ block.addLiveIn(CP.getDstReg());
}
BlockSeq.clear();
}
@@ -1845,32 +1227,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// SrcReg is guarateed to be the register whose live interval that is
// being merged.
- li_->removeInterval(SrcReg);
+ li_->removeInterval(CP.getSrcReg());
// Update regalloc hint.
- tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
-
- // Manually deleted the live interval copy.
- if (SavedLI) {
- SavedLI->clear();
- SavedLI.reset();
- }
-
- // If resulting interval has a preference that no longer fits because of subreg
- // coalescing, just clear the preference.
- unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
- if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
- TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
- const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
- if (!RC->contains(Preference))
- mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
- }
+ tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
DEBUG({
- dbgs() << "\t\tJoined. Result = ";
- ResDstInt->print(dbgs(), tri_);
- dbgs() << "\n";
- });
+ LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+ dbgs() << "\tJoined. Result = ";
+ DstInt.print(dbgs(), tri_);
+ dbgs() << "\n";
+ });
++numJoins;
return true;
@@ -1927,263 +1294,53 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
return ThisValNoAssignments[VN] = UltimateVN;
}
-static bool InVector(VNInfo *Val, const SmallVector<VNInfo*, 8> &V) {
- return std::find(V.begin(), V.end(), Val) != V.end();
-}
-
-static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR,
- const TargetInstrInfo *TII,
- const TargetRegisterInfo *TRI) {
- unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
- ;
- else if (MI->isExtractSubreg()) {
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(1).getReg();
- } else if (MI->isSubregToReg() ||
- MI->isInsertSubreg()) {
- DstReg = MI->getOperand(0).getReg();
- SrcReg = MI->getOperand(2).getReg();
- } else
- return false;
- return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) &&
- (DstReg == DR || TRI->isSuperRegister(DR, DstReg));
-}
-
-/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
-/// the specified live interval is defined by a copy from the specified
-/// register.
-bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
- LiveRange *LR,
- unsigned Reg) {
- unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
- if (SrcReg == Reg)
- return true;
- // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
- if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
- TargetRegisterInfo::isPhysicalRegister(li.reg) &&
- *tri_->getSuperRegisters(li.reg)) {
- // It's a sub-register live interval, we may not have precise information.
- // Re-compute it.
- MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start);
- if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) {
- // Cache computed info.
- LR->valno->def = LR->start;
- LR->valno->setCopy(DefMI);
- return true;
- }
- }
- return false;
-}
-
-
-/// ValueLiveAt - Return true if the LiveRange pointed to by the given
-/// iterator, or any subsequent range with the same value number,
-/// is live at the given point.
-bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr,
- LiveInterval::iterator LREnd,
- SlotIndex defPoint) const {
- for (const VNInfo *valno = LRItr->valno;
- (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) {
- if (LRItr->contains(defPoint))
- return true;
- }
-
- return false;
-}
-
-
-/// SimpleJoin - Attempt to joint the specified interval into this one. The
-/// caller of this method must guarantee that the RHS only contains a single
-/// value number and that the RHS is not defined by a copy from this
-/// interval. This returns false if the intervals are not joinable, or it
-/// joins them and returns true.
-bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
- assert(RHS.containsOneValue());
-
- // Some number (potentially more than one) value numbers in the current
- // interval may be defined as copies from the RHS. Scan the overlapping
- // portions of the LHS and RHS, keeping track of this and looking for
- // overlapping live ranges that are NOT defined as copies. If these exist, we
- // cannot coalesce.
-
- LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end();
- LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end();
-
- if (LHSIt->start < RHSIt->start) {
- LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start);
- if (LHSIt != LHS.begin()) --LHSIt;
- } else if (RHSIt->start < LHSIt->start) {
- RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start);
- if (RHSIt != RHS.begin()) --RHSIt;
- }
-
- SmallVector<VNInfo*, 8> EliminatedLHSVals;
-
- while (1) {
- // Determine if these live intervals overlap.
- bool Overlaps = false;
- if (LHSIt->start <= RHSIt->start)
- Overlaps = LHSIt->end > RHSIt->start;
- else
- Overlaps = RHSIt->end > LHSIt->start;
-
- // If the live intervals overlap, there are two interesting cases: if the
- // LHS interval is defined by a copy from the RHS, it's ok and we record
- // that the LHS value # is the same as the RHS. If it's not, then we cannot
- // coalesce these live ranges and we bail out.
- if (Overlaps) {
- // If we haven't already recorded that this value # is safe, check it.
- if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (LHSIt->valno->hasRedefByEC())
- return false;
- // Copy from the RHS?
- if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
- return false; // Nope, bail out.
-
- if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
- // Here is an interesting situation:
- // BB1:
- // vr1025 = copy vr1024
- // ..
- // BB2:
- // vr1024 = op
- // = vr1025
- // Even though vr1025 is copied from vr1024, it's not safe to
- // coalesce them since the live range of vr1025 intersects the
- // def of vr1024. This happens because vr1025 is assigned the
- // value of the previous iteration of vr1024.
+/// JoinIntervals - Attempt to join these two intervals. On failure, this
+/// returns false.
+bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
+ LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+ DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+ // If a live interval is a physical register, check for interference with any
+ // aliases. The interference check implemented here is a bit more conservative
+ // than the full interfeence check below. We allow overlapping live ranges
+ // only when one is a copy of the other.
+ if (CP.isPhys()) {
+ for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+ if (!li_->hasInterval(*AS))
+ continue;
+ const LiveInterval &LHS = li_->getInterval(*AS);
+ LiveInterval::const_iterator LI = LHS.begin();
+ for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+ RI != RE; ++RI) {
+ LI = std::lower_bound(LI, LHS.end(), RI->start);
+ // Does LHS have an overlapping live range starting before RI?
+ if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+ (RI->start != RI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+ });
return false;
- EliminatedLHSVals.push_back(LHSIt->valno);
- }
-
- // We know this entire LHS live range is okay, so skip it now.
- if (++LHSIt == LHSEnd) break;
- continue;
- }
+ }
- if (LHSIt->end < RHSIt->end) {
- if (++LHSIt == LHSEnd) break;
- } else {
- // One interesting case to check here. It's possible that we have
- // something like "X3 = Y" which defines a new value number in the LHS,
- // and is the last use of this liverange of the RHS. In this case, we
- // want to notice this copy (so that it gets coalesced away) even though
- // the live ranges don't actually overlap.
- if (LHSIt->start == RHSIt->end) {
- if (InVector(LHSIt->valno, EliminatedLHSVals)) {
- // We already know that this value number is going to be merged in
- // if coalescing succeeds. Just skip the liverange.
- if (++LHSIt == LHSEnd) break;
- } else {
- // If it's re-defined by an early clobber somewhere in the live range,
- // then conservatively abort coalescing.
- if (LHSIt->valno->hasRedefByEC())
+ // Check that LHS ranges beginning in this range are copies.
+ for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+ if (LI->start != LI->valno->def ||
+ !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+ DEBUG({
+ dbgs() << "\t\tInterference from alias: ";
+ LHS.print(dbgs(), tri_);
+ dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+ });
return false;
- // Otherwise, if this is a copy from the RHS, mark it as being merged
- // in.
- if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
- if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def))
- // Here is an interesting situation:
- // BB1:
- // vr1025 = copy vr1024
- // ..
- // BB2:
- // vr1024 = op
- // = vr1025
- // Even though vr1025 is copied from vr1024, it's not safe to
- // coalesced them since live range of vr1025 intersects the
- // def of vr1024. This happens because vr1025 is assigned the
- // value of the previous iteration of vr1024.
- return false;
- EliminatedLHSVals.push_back(LHSIt->valno);
-
- // We know this entire LHS live range is okay, so skip it now.
- if (++LHSIt == LHSEnd) break;
}
}
}
-
- if (++RHSIt == RHSEnd) break;
- }
- }
-
- // If we got here, we know that the coalescing will be successful and that
- // the value numbers in EliminatedLHSVals will all be merged together. Since
- // the most common case is that EliminatedLHSVals has a single number, we
- // optimize for it: if there is more than one value, we merge them all into
- // the lowest numbered one, then handle the interval as if we were merging
- // with one value number.
- VNInfo *LHSValNo = NULL;
- if (EliminatedLHSVals.size() > 1) {
- // Loop through all the equal value numbers merging them into the smallest
- // one.
- VNInfo *Smallest = EliminatedLHSVals[0];
- for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) {
- if (EliminatedLHSVals[i]->id < Smallest->id) {
- // Merge the current notion of the smallest into the smaller one.
- LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]);
- Smallest = EliminatedLHSVals[i];
- } else {
- // Merge into the smallest.
- LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest);
- }
}
- LHSValNo = Smallest;
- } else if (EliminatedLHSVals.empty()) {
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
- *tri_->getSuperRegisters(LHS.reg))
- // Imprecise sub-register information. Can't handle it.
- return false;
- llvm_unreachable("No copies from the RHS?");
- } else {
- LHSValNo = EliminatedLHSVals[0];
- }
-
- // Okay, now that there is a single LHS value number that we're merging the
- // RHS into, update the value number info for the LHS to indicate that the
- // value number is defined where the RHS value number was.
- const VNInfo *VNI = RHS.getValNumInfo(0);
- LHSValNo->def = VNI->def;
- LHSValNo->setCopy(VNI->getCopy());
-
- // Okay, the final step is to loop over the RHS live intervals, adding them to
- // the LHS.
- if (VNI->hasPHIKill())
- LHSValNo->setHasPHIKill(true);
- LHS.addKills(LHSValNo, VNI->kills);
- LHS.MergeRangesInAsValue(RHS, LHSValNo);
-
- LHS.ComputeJoinedWeight(RHS);
-
- // Update regalloc hint if both are virtual registers.
- if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
- TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
- std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
- std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
- if (RHSPref != LHSPref)
- mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
}
- // Update the liveintervals of sub-registers.
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
- for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS)
- li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS,
- li_->getVNInfoAllocator());
-
- return true;
-}
-
-/// JoinIntervals - Attempt to join these two intervals. On failure, this
-/// returns false. Otherwise, if one of the intervals being joined is a
-/// physreg, this method always canonicalizes LHS to be it. The output
-/// "RHS" will not have been modified, so we can use this information
-/// below to update aliases.
-bool
-SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
- bool &Swapped) {
// Compute the final value assignment, assuming that the live ranges can be
// coalesced.
SmallVector<int, 16> LHSValNoAssignments;
@@ -2192,203 +1349,87 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
SmallVector<VNInfo*, 16> NewVNInfo;
- // If a live interval is a physical register, conservatively check if any
- // of its sub-registers is overlapping the live interval of the virtual
- // register. If so, do not coalesce.
- if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) &&
- *tri_->getSubRegisters(LHS.reg)) {
- // If it's coalescing a virtual register to a physical register, estimate
- // its live interval length. This is the *cost* of scanning an entire live
- // interval. If the cost is low, we'll do an exhaustive check instead.
-
- // If this is something like this:
- // BB1:
- // v1024 = op
- // ...
- // BB2:
- // ...
- // RAX = v1024
- //
- // That is, the live interval of v1024 crosses a bb. Then we can't rely on
- // less conservative check. It's possible a sub-register is defined before
- // v1024 (or live in) and live out of BB1.
- if (RHS.containsOneValue() &&
- li_->intervalIsInOneMBB(RHS) &&
- li_->getApproximateInstructionCount(RHS) <= 10) {
- // Perform a more exhaustive check for some common cases.
- if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
- return false;
- } else {
- for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false;
- }
- }
- } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) &&
- *tri_->getSubRegisters(RHS.reg)) {
- if (LHS.containsOneValue() &&
- li_->getApproximateInstructionCount(LHS) <= 10) {
- // Perform a more exhaustive check for some common cases.
- if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
- return false;
- } else {
- for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)
- if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
- DEBUG({
- dbgs() << "\tInterfere with sub-register ";
- li_->getInterval(*SR).print(dbgs(), tri_);
- });
- return false;
- }
- }
- }
+ LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+ DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
- // Compute ultimate value numbers for the LHS and RHS values.
- if (RHS.containsOneValue()) {
- // Copies from a liveinterval with a single value are simple to handle and
- // very common, handle the special case here. This is important, because
- // often RHS is small and LHS is large (e.g. a physreg).
-
- // Find out if the RHS is defined as a copy from some value in the LHS.
- int RHSVal0DefinedFromLHS = -1;
- int RHSValID = -1;
- VNInfo *RHSValNoInfo = NULL;
- VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0);
- unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0);
- if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) {
- // If RHS is not defined as a copy from the LHS, we can use simpler and
- // faster checks to see if the live ranges are coalescable. This joiner
- // can't swap the LHS/RHS intervals though.
- if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
- return SimpleJoin(LHS, RHS);
- } else {
- RHSValNoInfo = RHSValNoInfo0;
- }
- } else {
- // It was defined as a copy from the LHS, find out what value # it is.
- RHSValNoInfo =
- LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno;
- RHSValID = RHSValNoInfo->id;
- RHSVal0DefinedFromLHS = RHSValID;
- }
+ // Loop over the value numbers of the LHS, seeing if any are defined from
+ // the RHS.
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
- LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
- RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
- NewVNInfo.resize(LHS.getNumValNums(), NULL);
-
- // Okay, *all* of the values in LHS that are defined as a copy from RHS
- // should now get updated.
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) {
- if (LHSSrcReg != RHS.reg) {
- // If this is not a copy from the RHS, its value number will be
- // unmodified by the coalescing.
- NewVNInfo[VN] = VNI;
- LHSValNoAssignments[VN] = VN;
- } else if (RHSValID == -1) {
- // Otherwise, it is a copy from the RHS, and we don't already have a
- // value# for it. Keep the current value number, but remember it.
- LHSValNoAssignments[VN] = RHSValID = VN;
- NewVNInfo[VN] = RHSValNoInfo;
- LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
- } else {
- // Otherwise, use the specified value #.
- LHSValNoAssignments[VN] = RHSValID;
- if (VN == (unsigned)RHSValID) { // Else this val# is dead.
- NewVNInfo[VN] = RHSValNoInfo;
- LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0;
- }
- }
- } else {
- NewVNInfo[VN] = VNI;
- LHSValNoAssignments[VN] = VN;
- }
- }
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
- assert(RHSValID != -1 && "Didn't find value #?");
- RHSValNoAssignments[0] = RHSValID;
- if (RHSVal0DefinedFromLHS != -1) {
- // This path doesn't go through ComputeUltimateVN so just set
- // it to anything.
- RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1;
- }
- } else {
- // Loop over the value numbers of the LHS, seeing if any are defined from
- // the RHS.
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
- continue;
+ // DstReg is known to be a register in the LHS interval. If the src is
+ // from the RHS interval, we can use its value #.
+ if (!CP.isCoalescable(VNI->getCopy()))
+ continue;
- // DstReg is known to be a register in the LHS interval. If the src is
- // from the RHS interval, we can use its value #.
- if (li_->getVNInfoSourceReg(VNI) != RHS.reg)
- continue;
+ // Figure out the value # from the RHS.
+ LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+ LHSValsDefinedFromRHS[VNI] = lr->valno;
+ }
- // Figure out the value # from the RHS.
- LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
- assert(lr && "Cannot find live range");
- LHSValsDefinedFromRHS[VNI] = lr->valno;
- }
+ // Loop over the value numbers of the RHS, seeing if any are defined from
+ // the LHS.
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ continue;
- // Loop over the value numbers of the RHS, seeing if any are defined from
- // the LHS.
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
- continue;
+ // Never join with a register that has EarlyClobber redefs.
+ if (VNI->hasRedefByEC())
+ return false;
- // DstReg is known to be a register in the RHS interval. If the src is
- // from the LHS interval, we can use its value #.
- if (li_->getVNInfoSourceReg(VNI) != LHS.reg)
- continue;
+ // DstReg is known to be a register in the RHS interval. If the src is
+ // from the LHS interval, we can use its value #.
+ if (!CP.isCoalescable(VNI->getCopy()))
+ continue;
- // Figure out the value # from the LHS.
- LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
- assert(lr && "Cannot find live range");
- RHSValsDefinedFromLHS[VNI] = lr->valno;
- }
+ // Figure out the value # from the LHS.
+ LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+ // The copy could be to an aliased physreg.
+ if (!lr) continue;
+ RHSValsDefinedFromLHS[VNI] = lr->valno;
+ }
- LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
- RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
- NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+ LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+ RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+ NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
- for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
- continue;
- ComputeUltimateVN(VNI, NewVNInfo,
- LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
- LHSValNoAssignments, RHSValNoAssignments);
+ for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ ComputeUltimateVN(VNI, NewVNInfo,
+ LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+ LHSValNoAssignments, RHSValNoAssignments);
+ }
+ for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+ i != e; ++i) {
+ VNInfo *VNI = *i;
+ unsigned VN = VNI->id;
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+ continue;
+ // If this value number isn't a copy from the LHS, it's a new number.
+ if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+ NewVNInfo.push_back(VNI);
+ RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+ continue;
}
- for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
- i != e; ++i) {
- VNInfo *VNI = *i;
- unsigned VN = VNI->id;
- if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
- continue;
- // If this value number isn't a copy from the LHS, it's a new number.
- if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
- NewVNInfo.push_back(VNI);
- RHSValNoAssignments[VN] = NewVNInfo.size()-1;
- continue;
- }
- ComputeUltimateVN(VNI, NewVNInfo,
- RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
- RHSValNoAssignments, LHSValNoAssignments);
- }
+ ComputeUltimateVN(VNI, NewVNInfo,
+ RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+ RHSValNoAssignments, LHSValNoAssignments);
}
// Armed with the mappings of LHS/RHS values to ultimate values, walk the
@@ -2399,15 +1440,17 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
LiveInterval::const_iterator JE = RHS.end();
// Skip ahead until the first place of potential sharing.
- if (I->start < J->start) {
- I = std::upper_bound(I, IE, J->start);
- if (I != LHS.begin()) --I;
- } else if (J->start < I->start) {
- J = std::upper_bound(J, JE, I->start);
- if (J != RHS.begin()) --J;
+ if (I != IE && J != JE) {
+ if (I->start < J->start) {
+ I = std::upper_bound(I, IE, J->start);
+ if (I != LHS.begin()) --I;
+ } else if (J->start < I->start) {
+ J = std::upper_bound(J, JE, I->start);
+ if (J != RHS.begin()) --J;
+ }
}
- while (1) {
+ while (I != IE && J != JE) {
// Determine if these two live ranges overlap.
bool Overlaps;
if (I->start < J->start) {
@@ -2429,13 +1472,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
return false;
}
- if (I->end < J->end) {
+ if (I->end < J->end)
++I;
- if (I == IE) break;
- } else {
+ else
++J;
- if (J == JE) break;
- }
}
// Update kill info. Some live ranges are extended due to copy coalescing.
@@ -2443,10 +1483,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
VNInfo *VNI = I->first;
unsigned LHSValID = LHSValNoAssignments[VNI->id];
- NewVNInfo[LHSValID]->removeKill(VNI->def);
if (VNI->hasPHIKill())
NewVNInfo[LHSValID]->setHasPHIKill(true);
- RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
}
// Update kill info. Some live ranges are extended due to copy coalescing.
@@ -2454,25 +1492,19 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
VNInfo *VNI = I->first;
unsigned RHSValID = RHSValNoAssignments[VNI->id];
- NewVNInfo[RHSValID]->removeKill(VNI->def);
if (VNI->hasPHIKill())
NewVNInfo[RHSValID]->setHasPHIKill(true);
- LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
}
+ if (LHSValNoAssignments.empty())
+ LHSValNoAssignments.push_back(-1);
+ if (RHSValNoAssignments.empty())
+ RHSValNoAssignments.push_back(-1);
+
// If we get here, we know that we can coalesce the live ranges. Ask the
// intervals to coalesce themselves now.
- if ((RHS.ranges.size() > LHS.ranges.size() &&
- TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
- TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
- RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
- mri_);
- Swapped = true;
- } else {
- LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
- mri_);
- Swapped = false;
- }
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ mri_);
return true;
}
@@ -2513,15 +1545,10 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
// If this isn't a copy nor a extract_subreg, we can't join intervals.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
bool isInsUndef = false;
- if (Inst->isExtractSubreg()) {
+ if (Inst->isCopy()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(1).getReg();
- } else if (Inst->isInsertSubreg()) {
- DstReg = Inst->getOperand(0).getReg();
- SrcReg = Inst->getOperand(2).getReg();
- if (Inst->getOperand(1).isUndef())
- isInsUndef = true;
- } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) {
+ } else if (Inst->isSubregToReg()) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(2).getReg();
} else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
@@ -2650,6 +1677,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
E = mri_->use_nodbg_end(); I != E; ++I) {
MachineOperand &Use = I.getOperand();
MachineInstr *UseMI = Use.getParent();
+ if (UseMI->isIdentityCopy())
+ continue;
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
SrcReg == DstReg && SrcSubIdx == DstSubIdx)
@@ -2680,7 +1709,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
// Ignore identity copies.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ if (!MI->isIdentityCopy() &&
+ !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
SrcReg == DstReg && SrcSubIdx == DstSubIdx))
for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
MachineOperand &Use = MI->getOperand(i);
@@ -2750,10 +1780,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// Delete all coalesced copies.
bool DoDelete = true;
if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- assert((MI->isExtractSubreg() || MI->isInsertSubreg() ||
- MI->isSubregToReg()) && "Unrecognized copy instruction");
- DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ assert(MI->isCopyLike() && "Unrecognized copy instruction");
+ SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
// Do not delete extract_subreg, insert_subreg of physical
// registers unless the definition is dead. e.g.
// %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
@@ -2762,7 +1791,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
DoDelete = false;
}
if (MI->allDefsAreDead()) {
- LiveInterval &li = li_->getInterval(DstReg);
+ LiveInterval &li = li_->getInterval(SrcReg);
if (!ShortenDeadCopySrcLiveRange(li, MI))
ShortenDeadCopyLiveRange(li, MI);
DoDelete = true;
@@ -2812,12 +1841,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// If the move will be an identity move delete it
bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) {
+ if (MI->isIdentityCopy() ||
+ (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) {
if (li_->hasInterval(SrcReg)) {
LiveInterval &RegInt = li_->getInterval(SrcReg);
// If def of this move instruction is dead, remove its live range
- // from the dstination register's live interval.
- if (MI->registerDefIsDead(DstReg)) {
+ // from the destination register's live interval.
+ if (MI->allDefsAreDead()) {
if (!ShortenDeadCopySrcLiveRange(RegInt, MI))
ShortenDeadCopyLiveRange(RegInt, MI);
}
@@ -2832,17 +1862,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// Check for now unnecessary kill flags.
if (li_->isNotInMIMap(MI)) continue;
- SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex();
+ SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isKill()) continue;
unsigned reg = MO.getReg();
if (!reg || !li_->hasInterval(reg)) continue;
- LiveInterval &LI = li_->getInterval(reg);
- const LiveRange *LR = LI.getLiveRangeContaining(UseIdx);
- if (!LR ||
- (!LR->valno->isKill(UseIdx.getDefIndex()) &&
- LR->valno->def != UseIdx.getDefIndex()))
+ if (!li_->getInterval(reg).killedAt(DefIdx))
MO.setIsKill(false);
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 1be04f3..e154da6 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -105,21 +105,12 @@ namespace llvm {
/// possible to coalesce this interval, but it may be possible if other
/// things get coalesced, then it returns true by reference in 'Again'.
bool JoinCopy(CopyRec &TheCopy, bool &Again);
-
+
/// JoinIntervals - Attempt to join these two intervals. On failure, this
- /// returns false. Otherwise, if one of the intervals being joined is a
- /// physreg, this method always canonicalizes DestInt to be it. The output
- /// "SrcInt" will not have been modified, so we can use this information
- /// below to update aliases.
- bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped);
-
- /// SimpleJoin - Attempt to join the specified interval into this one. The
- /// caller of this method must guarantee that the RHS only contains a single
- /// value number and that the RHS is not defined by a copy from this
- /// interval. This returns false if the intervals are not joinable, or it
- /// joins them and returns true.
- bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS);
-
+ /// returns false. The output "SrcInt" will not have been modified, so we can
+ /// use this information below to update aliases.
+ bool JoinIntervals(CoalescerPair &CP);
+
/// Return true if the two specified registers belong to different register
/// classes. The registers may be either phys or virt regs.
bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;
@@ -128,8 +119,7 @@ namespace llvm {
/// the source value number is defined by a copy from the destination reg
/// see if we can merge these two destination reg valno# into a single
/// value number, eliminating a copy.
- bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB,
- MachineInstr *CopyMI);
+ bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
/// HasOtherReachingDefs - Return true if there are definitions of IntB
/// other than BValNo val# that can reach uses of AValno val# of IntA.
@@ -140,8 +130,7 @@ namespace llvm {
/// If the source value number is defined by a commutable instruction and
/// its other operand is coalesced to the copy dest register, see if we
/// can transform the copy into a noop by commuting the definition.
- bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB,
- MachineInstr *CopyMI);
+ bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
/// TrimLiveIntervalToLastUse - If there is a last use in the same basic
/// block as the copy instruction, trim the ive interval to the last use
@@ -155,28 +144,6 @@ namespace llvm {
bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
unsigned DstSubIdx, MachineInstr *CopyMI);
- /// CanCoalesceWithImpDef - Returns true if the specified copy instruction
- /// from an implicit def to another register can be coalesced away.
- bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
- LiveInterval &li, LiveInterval &ImpLi) const;
-
- /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
- /// implicit_def and it is being removed. Turn all copies from this value#
- /// into implicit_defs.
- void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
-
- /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
- /// a virtual destination register with physical source register.
- bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt, LiveInterval &SrcInt);
-
- /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a
- /// copy from a virtual source register to a physical destination register.
- bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
- MachineBasicBlock *CopyMBB,
- LiveInterval &DstInt, LiveInterval &SrcInt);
-
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
bool isWinToJoinCrossClass(unsigned SrcReg,
@@ -185,43 +152,12 @@ namespace llvm {
const TargetRegisterClass *DstRC,
const TargetRegisterClass *NewRC);
- /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
- /// register with a physical register, check if any of the virtual register
- /// operand is a sub-register use or def. If so, make sure it won't result
- /// in an illegal extract_subreg or insert_subreg instruction.
- bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI,
- unsigned VirtReg, unsigned PhysReg);
-
- /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce
- /// an extract_subreg where dst is a physical register, e.g.
- /// cl = EXTRACT_SUBREG reg1024, 1
- bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
- unsigned SubIdx, unsigned &RealDstReg);
-
- /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce
- /// an insert_subreg where src is a physical register, e.g.
- /// reg1024 = INSERT_SUBREG reg1024, c1, 0
- bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg,
- unsigned SubIdx, unsigned &RealDstReg);
-
- /// ValueLiveAt - Return true if the LiveRange pointed to by the given
- /// iterator, or any subsequent range with the same value number,
- /// is live at the given point.
- bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd,
- SlotIndex defPoint) const;
-
- /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of
- /// the specified live interval is defined by a copy from the specified
- /// register.
- bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR,
- unsigned Reg);
-
/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
/// update the subregister number if it is not zero. If DstReg is a
/// physical register and the existing subregister number of the def / use
/// being updated is not zero, make sure to set it to the correct physical
/// subregister.
- void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
+ void UpdateRegDefsUses(const CoalescerPair &CP);
/// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy.
/// Return true if live interval is removed.
@@ -238,6 +174,10 @@ namespace llvm {
/// it as well.
bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI);
+ /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the
+ /// VNInfo copy flag for DstReg and all aliases.
+ void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI);
+
/// lastRegisterUse - Returns the last use of the specific register between
/// cycles Start and End or NULL if there are no uses.
MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 059e8d6..e90869d 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -46,6 +46,8 @@ namespace {
Constant *UnregisterFn;
Constant *BuiltinSetjmpFn;
Constant *FrameAddrFn;
+ Constant *StackAddrFn;
+ Constant *StackRestoreFn;
Constant *LSDAAddrFn;
Value *PersonalityFn;
Constant *SelectorFn;
@@ -69,7 +71,7 @@ namespace {
void insertCallSiteStore(Instruction *I, int Number, Value *CallSite);
void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite,
SwitchInst *CatchSwitch);
- void splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
+ void splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes);
bool insertSjLjEHSupport(Function &F);
};
} // end anonymous namespace
@@ -107,6 +109,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
PointerType::getUnqual(FunctionContextTy),
(Type *)0);
FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
+ StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp);
LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda);
SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
@@ -175,8 +179,10 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
/// we spill into a stack location, guaranteeing that there is nothing live
/// across the unwind edge. This process also splits all critical edges
/// coming out of invoke's.
+/// FIXME: Move this function to a common utility file (Local.cpp?) so
+/// both SjLj and LowerInvoke can use it.
void SjLjEHPass::
-splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
+splitLiveRangesAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
// First step, split all critical edges from invoke instructions.
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
InvokeInst *II = Invokes[i];
@@ -198,16 +204,33 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
++AfterAllocaInsertPt;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- // This is always a no-op cast because we're casting AI to AI->getType() so
- // src and destination types are identical. BitCast is the only possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Normally its is forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However, we're
- // replacing it here with the same value it was constructed with to simply
- // make NC its user.
- NC->setOperand(0, AI);
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
}
// Finally, scan the code looking for instructions with bad live ranges.
@@ -266,6 +289,9 @@ splitLiveRangesLiveAcrossInvokes(SmallVector<InvokeInst*,16> &Invokes) {
}
// If we decided we need a spill, do it.
+ // FIXME: Spilling this way is overkill, as it forces all uses of
+ // the value to be reloaded from the stack slot, even those that aren't
+ // in the unwind blocks. We should be more selective.
if (NeedsSpill) {
++NumSpilled;
DemoteRegToStack(*Inst, true);
@@ -294,22 +320,34 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// If we don't have any invokes or unwinds, there's nothing to do.
if (Unwinds.empty() && Invokes.empty()) return false;
- // Find the eh.selector.* and eh.exception calls. We'll use the first
- // eh.selector to determine the right personality function to use. For
- // SJLJ, we always use the same personality for the whole function,
- // not on a per-selector basis.
+ // Find the eh.selector.*, eh.exception and alloca calls.
+ //
+ // Remember any allocas() that aren't in the entry block, as the
+ // jmpbuf saved SP will need to be updated for them.
+ //
+ // We'll use the first eh.selector to determine the right personality
+ // function to use. For SJLJ, we always use the same personality for the
+ // whole function, not on a per-selector basis.
// FIXME: That's a bit ugly. Better way?
SmallVector<CallInst*,16> EH_Selectors;
SmallVector<CallInst*,16> EH_Exceptions;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+ SmallVector<Instruction*,16> JmpbufUpdatePoints;
+ // Note: Skip the entry block since there's nothing there that interests
+ // us. eh.selector and eh.exception shouldn't ever be there, and we
+ // want to disregard any allocas that are there.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
if (CallInst *CI = dyn_cast<CallInst>(I)) {
if (CI->getCalledFunction() == SelectorFn) {
- if (!PersonalityFn) PersonalityFn = CI->getOperand(2);
+ if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1);
EH_Selectors.push_back(CI);
} else if (CI->getCalledFunction() == ExceptionFn) {
EH_Exceptions.push_back(CI);
+ } else if (CI->getCalledFunction() == StackRestoreFn) {
+ JmpbufUpdatePoints.push_back(CI);
}
+ } else if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+ JmpbufUpdatePoints.push_back(AI);
}
}
}
@@ -329,7 +367,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// we spill into a stack location, guaranteeing that there is nothing live
// across the unwind edge. This process also splits all critical edges
// coming out of invoke's.
- splitLiveRangesLiveAcrossInvokes(Invokes);
+ splitLiveRangesAcrossInvokes(Invokes);
BasicBlock *EntryBB = F.begin();
// Create an alloca for the incoming jump buffer ptr and the new jump buffer
@@ -419,7 +457,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
// Populate the Function Context
// 1. LSDA address
// 2. Personality function address
- // 3. jmpbuf (save FP and call eh.sjlj.setjmp)
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
// LSDA address
Idxs[0] = Zero;
@@ -440,31 +478,41 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
EntryBB->getTerminator());
- // Save the frame pointer.
+ // Save the frame pointer.
Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *FieldPtr
+ Value *JBufPtr
= GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
"jbuf_gep",
EntryBB->getTerminator());
Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *ElemPtr =
- GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
EntryBB->getTerminator());
Value *Val = CallInst::Create(FrameAddrFn,
ConstantInt::get(Int32Ty, 0),
"fp",
EntryBB->getTerminator());
- new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator());
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, FieldPtr,
+ CastInst::Create(Instruction::BitCast, JBufPtr,
Type::getInt8PtrTy(F.getContext()), "",
EntryBB->getTerminator());
Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
"dispatch",
EntryBB->getTerminator());
- // check the return value of the setjmp. non-zero goes to dispatcher
+ // check the return value of the setjmp. non-zero goes to dispatcher.
Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
ICmpInst::ICMP_EQ, DispatchVal, Zero,
"notunwind");
@@ -509,6 +557,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Unwinds[i]->eraseFromParent();
}
+ // Following any allocas not in the entry block, update the saved SP
+ // in the jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
+ }
+
// Finally, for any returns from this function, if this function contains an
// invoke, add a call to unregister the function context.
for (unsigned i = 0, e = Returns.size(); i != e; ++i)
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 6110ef5..7a227cf 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -213,9 +213,11 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
- os << getIndex();
+ os << entry().getIndex();
if (isPHI())
os << "*";
+ else
+ os << "LudS"[getSlot()];
}
// Dump a SlotIndex to stderr.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index a7b2efe..56bcb28 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -14,18 +14,20 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <set>
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, splitting };
+ enum SpillerName { trivial, standard, splitting, inline_ };
}
static cl::opt<SpillerName>
@@ -35,6 +37,7 @@ spillerOpt("spiller",
cl::values(clEnumVal(trivial, "trivial spiller"),
clEnumVal(standard, "default spiller"),
clEnumVal(splitting, "splitting spiller"),
+ clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
cl::init(standard));
@@ -53,8 +56,8 @@ protected:
const TargetInstrInfo *tii;
const TargetRegisterInfo *tri;
VirtRegMap *vrm;
-
- /// Construct a spiller base.
+
+ /// Construct a spiller base.
SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
: mf(mf), lis(lis), vrm(vrm)
{
@@ -67,7 +70,8 @@ protected:
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding or
/// remat is attempted.
- std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) {
+ void trivialSpillEverywhere(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals) {
DEBUG(dbgs() << "Spilling everywhere " << *li << "\n");
assert(li->weight != HUGE_VALF &&
@@ -78,8 +82,6 @@ protected:
DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
- std::vector<LiveInterval*> added;
-
const TargetRegisterClass *trc = mri->getRegClass(li->reg);
unsigned ss = vrm->assignVirt2StackSlot(li->reg);
@@ -96,7 +98,7 @@ protected:
do {
++regItr;
} while (regItr != mri->reg_end() && (&*regItr == mi));
-
+
// Collect uses & defs for this instr.
SmallVector<unsigned, 2> indices;
bool hasUse = false;
@@ -116,7 +118,7 @@ protected:
vrm->assignVirt2StackSlot(newVReg, ss);
LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
newLI->weight = HUGE_VALF;
-
+
// Update the reg operands & kill flags.
for (unsigned i = 0; i < indices.size(); ++i) {
unsigned mopIdx = indices[i];
@@ -136,10 +138,10 @@ protected:
MachineInstr *loadInstr(prior(miItr));
SlotIndex loadIndex =
lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, loadInstr);
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
- loadVNI->addKill(endIndex);
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
@@ -150,17 +152,15 @@ protected:
MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
+ vrm->addSpillSlotUse(ss, storeInstr);
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
- storeVNI->addKill(storeIndex);
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
- added.push_back(newLI);
+ newIntervals.push_back(newLI);
}
-
- return added;
}
};
@@ -176,11 +176,12 @@ public:
TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm)
: SpillerBase(mf, lis, vrm) {}
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &,
+ SlotIndex*) {
// Ignore spillIs - we don't use it.
- return trivialSpillEverywhere(li);
+ trivialSpillEverywhere(li, newIntervals);
}
};
@@ -200,10 +201,13 @@ public:
: lis(lis), loopInfo(loopInfo), vrm(vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex*) {
- return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
+ std::vector<LiveInterval*> added =
+ lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
+ newIntervals.insert(newIntervals.end(), added.begin(), added.end());
}
};
@@ -214,7 +218,7 @@ namespace {
/// When a call to spill is placed this spiller will first try to break the
/// interval up into its component values (one new interval per value).
/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
+/// then the spiller falls back on the standard spilling mechanism.
class SplittingSpiller : public StandardSpiller {
public:
SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
@@ -226,22 +230,21 @@ public:
tri = mf->getTarget().getRegisterInfo();
}
- std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestStart) {
-
- if (worthTryingToSplit(li)) {
- return tryVNISplit(li, earliestStart);
- }
- // else
- return StandardSpiller::spill(li, spillIs, earliestStart);
+ void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestStart) {
+ if (worthTryingToSplit(li))
+ tryVNISplit(li, earliestStart);
+ else
+ StandardSpiller::spill(li, newIntervals, spillIs, earliestStart);
}
private:
MachineRegisterInfo *mri;
const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
+ const TargetRegisterInfo *tri;
DenseSet<LiveInterval*> alreadySplit;
bool worthTryingToSplit(LiveInterval *li) const {
@@ -258,18 +261,18 @@ private:
SmallVector<VNInfo*, 4> vnis;
std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
+
for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
VNInfo *vni = *vniItr;
-
- // Skip unused VNIs, or VNIs with no kills.
- if (vni->isUnused() || vni->kills.empty())
+
+ // Skip unused VNIs.
+ if (vni->isUnused())
continue;
DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
LiveInterval *splitInterval = extractVNI(li, vni);
-
+
if (splitInterval != 0) {
DEBUG(dbgs() << *splitInterval << "\n");
added.push_back(splitInterval);
@@ -281,12 +284,12 @@ private:
} else {
DEBUG(dbgs() << "0\n");
}
- }
+ }
DEBUG(dbgs() << "Original LI: " << *li << "\n");
// If there original interval still contains some live ranges
- // add it to added and alreadySplit.
+ // add it to added and alreadySplit.
if (!li->empty()) {
added.push_back(li);
alreadySplit.insert(li);
@@ -302,16 +305,15 @@ private:
/// Extract the given value number from the interval.
LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
assert(vni->isDefAccurate() || vni->isPHIDef());
- assert(!vni->kills.empty());
- // Create a new vreg and live interval, copy VNI kills & ranges over.
+ // Create a new vreg and live interval, copy VNI ranges over.
const TargetRegisterClass *trc = mri->getRegClass(li->reg);
unsigned newVReg = mri->createVirtualRegister(trc);
vrm->grow();
LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
- // Start by copying all live ranges in the VN to the new interval.
+ // Start by copying all live ranges in the VN to the new interval.
for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
rItr != rEnd; ++rItr) {
if (rItr->valno == vni) {
@@ -319,7 +321,7 @@ private:
}
}
- // Erase the old VNI & ranges.
+ // Erase the old VNI & ranges.
li->removeValNo(vni);
// Collect all current uses of the register belonging to the given VNI.
@@ -336,15 +338,13 @@ private:
// Insert a copy at the start of the MBB. The range proceeding the
// copy will be attached to the original LiveInterval.
MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = defMBB->begin();
- copyMI->addRegisterKilled(li->reg, tri);
+ MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
0, false, lis->getVNInfoAllocator());
phiDefVNI->setIsPHIDef(true);
- phiDefVNI->addKill(copyIdx.getDefIndex());
li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
LiveRange *oldPHIDefRange =
newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
@@ -367,8 +367,8 @@ private:
newVNI->setIsPHIDef(false); // not a PHI def anymore.
newVNI->setIsDefAccurate(true);
} else {
- // non-PHI def. Rename the def. If it's two-addr that means renaming the use
- // and inserting a new copy too.
+ // non-PHI def. Rename the def. If it's two-addr that means renaming the
+ // use and inserting a new copy too.
MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
// We'll rename this now, so we can remove it from uses.
uses.erase(defInst);
@@ -384,38 +384,26 @@ private:
twoAddrUseIsUndef = true;
}
}
-
+
SlotIndex defIdx = lis->getInstructionIndex(defInst);
newVNI->def = defIdx.getDefIndex();
if (isTwoAddr && !twoAddrUseIsUndef) {
MachineBasicBlock *defMBB = defInst->getParent();
- tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
+ MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- copyMI->addRegisterKilled(li->reg, tri);
LiveRange *origUseRange =
li->getLiveRangeContaining(newVNI->def.getUseIndex());
- VNInfo *origUseVNI = origUseRange->valno;
origUseRange->end = copyIdx.getDefIndex();
- bool updatedKills = false;
- for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) {
- if (origUseVNI->kills[k] == defIdx.getDefIndex()) {
- origUseVNI->kills[k] = copyIdx.getDefIndex();
- updatedKills = true;
- break;
- }
- }
- assert(updatedKills && "Failed to update VNI kill list.");
VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
true, lis->getVNInfoAllocator());
- copyVNI->addKill(defIdx.getDefIndex());
LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
newLI->addRange(copyRange);
- }
+ }
}
-
+
for (std::set<MachineInstr*>::iterator
usesItr = uses.begin(), usesEnd = uses.end();
usesItr != usesEnd; ++usesItr) {
@@ -435,7 +423,7 @@ private:
// Check if this instr is two address.
unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
+
// Rename uses (and defs for two-address instrs).
for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
MachineOperand &mo = useInst->getOperand(i);
@@ -451,10 +439,9 @@ private:
// reg.
MachineBasicBlock *useMBB = useInst->getParent();
MachineBasicBlock::iterator useItr(useInst);
- tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = llvm::next(useItr);
- copyMI->addRegisterKilled(newVReg, tri);
+ MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
+ tii->get(TargetOpcode::COPY), newVReg)
+ .addReg(li->reg, RegState::Kill);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
// Change the old two-address defined range & vni to start at
@@ -470,56 +457,44 @@ private:
VNInfo *copyVNI =
newLI->getNextValue(useIdx.getDefIndex(), 0, true,
lis->getVNInfoAllocator());
- copyVNI->addKill(copyIdx.getDefIndex());
LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
newLI->addRange(copyRange);
}
}
-
- // Iterate over any PHI kills - we'll need to insert new copies for them.
- for (VNInfo::KillSet::iterator
- killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end();
- killItr != killEnd; ++killItr) {
- SlotIndex killIdx(*killItr);
- if (killItr->isPHI()) {
- MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
- LiveRange *oldKillRange =
- newLI->getLiveRangeContaining(killIdx);
-
- assert(oldKillRange != 0 && "No kill range?");
-
- tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
- li->reg, newVReg, trc, trc,
- DebugLoc());
- MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
- copyMI->addRegisterKilled(newVReg, tri);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- // Save the current end. We may need it to add a new range if the
- // current range runs of the end of the MBB.
- SlotIndex newKillRangeEnd = oldKillRange->end;
- oldKillRange->end = copyIdx.getDefIndex();
+ // Iterate over any PHI kills - we'll need to insert new copies for them.
+ for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
+ LRI != LRE; ++LRI) {
+ if (LRI->valno != newVNI || LRI->end.isPHI())
+ continue;
+ SlotIndex killIdx = LRI->end;
+ MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
+ MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
+ DebugLoc(), tii->get(TargetOpcode::COPY),
+ li->reg)
+ .addReg(newVReg, RegState::Kill);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
- assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
- "PHI kill range doesn't reach kill-block end. Not sane.");
- newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
- newKillRangeEnd, newVNI));
- }
+ // Save the current end. We may need it to add a new range if the
+ // current range runs of the end of the MBB.
+ SlotIndex newKillRangeEnd = LRI->end;
+ LRI->end = copyIdx.getDefIndex();
- *killItr = oldKillRange->end;
- VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
- copyMI, true,
- lis->getVNInfoAllocator());
- newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB));
- newKillVNI->setHasPHIKill(true);
- li->addRange(LiveRange(copyIdx.getDefIndex(),
- lis->getMBBEndIdx(killMBB),
- newKillVNI));
+ if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
+ assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
+ "PHI kill range doesn't reach kill-block end. Not sane.");
+ newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
+ newKillRangeEnd, newVNI));
}
+ VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
+ copyMI, true,
+ lis->getVNInfoAllocator());
+ newKillVNI->setHasPHIKill(true);
+ li->addRange(LiveRange(copyIdx.getDefIndex(),
+ lis->getMBBEndIdx(killMBB),
+ newKillVNI));
}
-
newVNI->setHasPHIKill(false);
return newLI;
@@ -530,6 +505,13 @@ private:
} // end anonymous namespace
+namespace llvm {
+Spiller *createInlineSpiller(MachineFunction*,
+ LiveIntervals*,
+ const MachineLoopInfo*,
+ VirtRegMap*);
+}
+
llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
const MachineLoopInfo *loopInfo,
VirtRegMap *vrm) {
@@ -538,5 +520,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
case trivial: return new TrivialSpiller(mf, lis, vrm);
case standard: return new StandardSpiller(lis, loopInfo, vrm);
case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm);
+ case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm);
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index dda52e8..450447b 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -33,11 +33,19 @@ namespace llvm {
public:
virtual ~Spiller() = 0;
- /// Spill the given live range. The method used will depend on the Spiller
- /// implementation selected.
- virtual std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs,
- SlotIndex *earliestIndex = 0) = 0;
+ /// spill - Spill the given live interval. The method used will depend on
+ /// the Spiller implementation selected.
+ ///
+ /// @param li The live interval to be spilled.
+ /// @param spillIs A list of intervals that are about to be spilled,
+ /// and so cannot be used for remat etc.
+ /// @param newIntervals The newly created intervals will be appended here.
+ /// @param earliestIndex The earliest point for splitting. (OK, it's another
+ /// pointer to the allocator guts).
+ virtual void spill(LiveInterval *li,
+ std::vector<LiveInterval*> &newIntervals,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex = 0) = 0;
};
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 8a6a727..ca5c28c 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -136,7 +136,7 @@ bool StackProtector::RequiresStackProtector() const {
bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
AllocaInst *AI = 0; // Place on stack that stores the stack guard.
- Constant *StackGuardVar = 0; // The stack guard variable.
+ Value *StackGuardVar = 0; // The stack guard variable.
for (Function::iterator I = F->begin(), E = F->end(); I != E; ) {
BasicBlock *BB = I++;
@@ -153,9 +153,17 @@ bool StackProtector::InsertStackProtectors() {
// StackGuard = load __stack_chk_guard
// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot)
//
- PointerType *PtrTy = PointerType::getUnqual(
- Type::getInt8Ty(RI->getContext()));
- StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
+ unsigned AddressSpace, Offset;
+ if (TLI->getStackCookieLocation(AddressSpace, Offset)) {
+ Constant *OffsetVal =
+ ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset);
+
+ StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal,
+ PointerType::get(PtrTy, AddressSpace));
+ } else {
+ StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy);
+ }
BasicBlock &Entry = F->getEntryBlock();
Instruction *InsPt = &Entry.front();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 7f3b452..eff3c33 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -508,8 +509,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
// Abort the use is actually a sub-register def. We don't have enough
// information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isExtractSubreg() ||
- MII->isInsertSubreg() || MII->isSubregToReg())
+ if (MO.getSubReg() || MII->isSubregToReg())
return false;
const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
@@ -571,7 +571,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
// Abort the use is actually a sub-register use. We don't have enough
// information to figure out if it is really legal.
- if (MO.getSubReg() || MII->isExtractSubreg())
+ if (MO.getSubReg())
return false;
const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
@@ -610,8 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumLoadElim;
} else {
- TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC,
- MI->getDebugLoc());
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ DstReg).addReg(Reg);
++NumRegRepl;
}
@@ -627,8 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumStoreElim;
} else {
- TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC,
- MI->getDebugLoc());
+ BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg)
+ .addReg(SrcReg);
++NumRegRepl;
}
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 142398c..59315cf 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
@@ -695,9 +696,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert copy from curr.second to a temporary at
// the Phi defining curr.second
MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
- TII->copyRegToReg(*PI->getParent(), PI, t,
- curr.second, RC, RC, DebugLoc());
-
+ BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ t).addReg(curr.second);
DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
<< "\n");
@@ -712,8 +712,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
}
// Insert copy from map[curr.first] to curr.second
- TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
- map[curr.first], RC, RC, DebugLoc());
+ BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
map[curr.first] = curr.second;
DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
<< curr.second << "\n");
@@ -761,8 +761,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert a copy from dest to a new temporary t at the end of b
unsigned t = MF->getRegInfo().createVirtualRegister(RC);
- TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
- curr.second, RC, RC, DebugLoc());
+ BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), t).addReg(curr.second);
map[curr.second] = t;
MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
@@ -830,9 +830,6 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
VNInfo* FirstVN = *Int.vni_begin();
FirstVN->setHasPHIKill(false);
- if (I->getOperand(i).isKill())
- FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex());
-
LiveRange LR (LI.getMBBStartIdx(I->getParent()),
LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
FirstVN);
@@ -959,9 +956,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Insert a last-minute copy if a conflict was detected.
const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
- TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
- I->first, SI->first, RC, RC, DebugLoc());
+ BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
LI.renumber();
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index f2e2a76..075db80 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -559,11 +560,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
}
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
- TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC,RC, DebugLoc());
- MachineInstr *CopyMI = prior(Loc);
- Copies.push_back(CopyMI);
+ Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first).addReg(CopyInfos[i].second));
}
NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
@@ -618,11 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
}
MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
- const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
- TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC, RC, DebugLoc());
- MachineInstr *CopyMI = prior(Loc);
- Copies.push_back(CopyMI);
+ Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyInfos[i].first)
+ .addReg(CopyInfos[i].second));
}
} else {
// No PHIs to worry about, just splice the instructions over.
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 0ad6619..cdacb98 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/SmallVector.h"
@@ -21,11 +22,34 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
+/// after it, replacing it with an unconditional branch to NewDest.
+void
+TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+
+ // Remove all the old successors of MBB from the CFG.
+ while (!MBB->succ_empty())
+ MBB->removeSuccessor(MBB->succ_begin());
+
+ // Remove all the dead instructions from the end of MBB.
+ MBB->erase(Tail, MBB->end());
+
+ // If MBB isn't immediately before MBB, insert a branch to it.
+ if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
+ InsertBranch(*MBB, NewDest, 0, SmallVector<MachineOperand, 0>(),
+ Tail->getDebugLoc());
+ MBB->addSuccessor(NewDest);
+}
+
// commuteInstruction - The default implementation of this method just exchanges
// the two operands returned by findCommutedOpIndices.
MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
@@ -136,17 +160,9 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg,
unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MachineOperand &MO = MI->getOperand(0);
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
- MO.setReg(DestReg);
- MO.setSubReg(SubIdx);
- } else if (SubIdx) {
- MO.setReg(TRI->getSubReg(DestReg, SubIdx));
- } else {
- MO.setReg(DestReg);
- }
+ MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
}
@@ -175,6 +191,47 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
return FnSize;
}
+// If the COPY instruction in MI can be folded to a stack operation, return
+// the register class to use.
+static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI,
+ unsigned FoldIdx) {
+ assert(MI->isCopy() && "MI must be a COPY instruction");
+ if (MI->getNumOperands() != 2)
+ return 0;
+ assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+
+ const MachineOperand &FoldOp = MI->getOperand(FoldIdx);
+ const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx);
+
+ if (FoldOp.getSubReg() || LiveOp.getSubReg())
+ return 0;
+
+ unsigned FoldReg = FoldOp.getReg();
+ unsigned LiveReg = LiveOp.getReg();
+
+ assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
+ "Cannot fold physregs");
+
+ const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+ const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
+
+ if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ return RC->contains(LiveOp.getReg()) ? RC : 0;
+
+ const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg);
+ if (RC == LiveRC || RC->hasSubClass(LiveRC))
+ return RC;
+
+ // FIXME: Allow folding when register classes are memory compatible.
+ return 0;
+}
+
+bool TargetInstrInfoImpl::
+canFoldMemoryOperand(const MachineInstr *MI,
+ const SmallVectorImpl<unsigned> &Ops) const {
+ return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]);
+}
+
/// foldMemoryOperand - Attempt to fold a load or store of the specified stack
/// slot into the specified machine instruction for the specified operand(s).
/// If this is possible, a new instruction is returned with the specified
@@ -182,10 +239,9 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
/// removing the old instruction and adding the new one in the instruction
/// stream.
MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
- MachineInstr* MI,
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
+ int FI) const {
unsigned Flags = 0;
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (MI->getOperand(Ops[i]).isDef())
@@ -193,34 +249,56 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
else
Flags |= MachineMemOperand::MOLoad;
+ MachineBasicBlock *MBB = MI->getParent();
+ assert(MBB && "foldMemoryOperand needs an inserted instruction");
+ MachineFunction &MF = *MBB->getParent();
+
// Ask the target to do the actual folding.
- MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex);
- if (!NewMI) return 0;
+ if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) {
+ // Add a memory operand, foldMemoryOperandImpl doesn't do that.
+ assert((!(Flags & MachineMemOperand::MOStore) ||
+ NewMI->getDesc().mayStore()) &&
+ "Folded a def to a non-store!");
+ assert((!(Flags & MachineMemOperand::MOLoad) ||
+ NewMI->getDesc().mayLoad()) &&
+ "Folded a use to a non-load!");
+ const MachineFrameInfo &MFI = *MF.getFrameInfo();
+ assert(MFI.getObjectOffset(FI) != -1);
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
+ Flags, /*Offset=*/0,
+ MFI.getObjectSize(FI),
+ MFI.getObjectAlignment(FI));
+ NewMI->addMemOperand(MF, MMO);
- assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->getDesc().mayStore()) &&
- "Folded a def to a non-store!");
- assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->getDesc().mayLoad()) &&
- "Folded a use to a non-load!");
- const MachineFrameInfo &MFI = *MF.getFrameInfo();
- assert(MFI.getObjectOffset(FrameIndex) != -1);
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex),
- Flags, /*Offset=*/0,
- MFI.getObjectSize(FrameIndex),
- MFI.getObjectAlignment(FrameIndex));
- NewMI->addMemOperand(MF, MMO);
+ // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI.
+ return MBB->insert(MI, NewMI);
+ }
- return NewMI;
+ // Straight COPY may fold as load/store.
+ if (!MI->isCopy() || Ops.size() != 1)
+ return 0;
+
+ const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]);
+ if (!RC)
+ return 0;
+
+ const MachineOperand &MO = MI->getOperand(1-Ops[0]);
+ MachineBasicBlock::iterator Pos = MI;
+ const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
+
+ if (Flags == MachineMemOperand::MOStore)
+ storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
+ else
+ loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI);
+ return --Pos;
}
/// foldMemoryOperand - Same as the previous version except it allows folding
/// of any load and store from / to any address, not just from a specific
/// stack slot.
MachineInstr*
-TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
- MachineInstr* MI,
+TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const {
assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!");
@@ -228,11 +306,15 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!");
#endif
+ MachineBasicBlock &MBB = *MI->getParent();
+ MachineFunction &MF = *MBB.getParent();
// Ask the target to do the actual folding.
MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI);
if (!NewMI) return 0;
+ NewMI = MBB.insert(MI, NewMI);
+
// Copy the memoperands from the load to the folded instruction.
NewMI->setMemRefs(LoadMI->memoperands_begin(),
LoadMI->memoperands_end());
@@ -240,11 +322,9 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF,
return NewMI;
}
-bool
-TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
- MI,
- AliasAnalysis *
- AA) const {
+bool TargetInstrInfo::
+isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
+ AliasAnalysis *AA) const {
const MachineFunction &MF = *MI->getParent()->getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetMachine &TM = MF.getTarget();
@@ -324,3 +404,31 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
// Everything checked out.
return true;
}
+
+/// isSchedulingBoundary - Test if the given instruction should be
+/// considered a scheduling boundary. This primarily includes labels
+/// and terminators.
+bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const{
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
+ if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ return true;
+
+ return false;
+}
+
+// Default implementation of CreateTargetPostRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 71ad3fb..a80cfc4 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -825,32 +825,32 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
TargetLoweringObjectFile::Initialize(Ctx, TM);
TextSection =
getContext().getCOFFSection(".text",
- MCSectionCOFF::IMAGE_SCN_CNT_CODE |
- MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_CODE |
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getText());
DataSection =
getContext().getCOFFSection(".data",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
ReadOnlySection =
getContext().getCOFFSection(".rdata",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
StaticCtorSection =
getContext().getCOFFSection(".ctors",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
StaticDtorSection =
getContext().getCOFFSection(".dtors",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
// FIXME: We're emitting LSDA info into a readonly section on COFF, even
@@ -859,76 +859,76 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
// adjusted or this should be a data section.
LSDASection =
getContext().getCOFFSection(".gcc_except_table",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
EHFrameSection =
getContext().getCOFFSection(".eh_frame",
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
SectionKind::getDataRel());
// Debug info.
DwarfAbbrevSection =
getContext().getCOFFSection(".debug_abbrev",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfInfoSection =
getContext().getCOFFSection(".debug_info",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLineSection =
getContext().getCOFFSection(".debug_line",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfFrameSection =
getContext().getCOFFSection(".debug_frame",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfPubNamesSection =
getContext().getCOFFSection(".debug_pubnames",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfPubTypesSection =
getContext().getCOFFSection(".debug_pubtypes",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfStrSection =
getContext().getCOFFSection(".debug_str",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfLocSection =
getContext().getCOFFSection(".debug_loc",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfARangesSection =
getContext().getCOFFSection(".debug_aranges",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfRangesSection =
getContext().getCOFFSection(".debug_ranges",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DwarfMacroInfoSection =
getContext().getCOFFSection(".debug_macinfo",
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
- MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ COFF::IMAGE_SCN_MEM_DISCARDABLE |
+ COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata());
DrectveSection =
getContext().getCOFFSection(".drectve",
- MCSectionCOFF::IMAGE_SCN_LNK_INFO,
+ COFF::IMAGE_SCN_LNK_INFO,
SectionKind::getMetadata());
}
@@ -936,27 +936,27 @@ static unsigned
getCOFFSectionFlags(SectionKind K) {
unsigned Flags = 0;
- if (!K.isMetadata())
+ if (K.isMetadata())
Flags |=
- MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE;
+ COFF::IMAGE_SCN_MEM_DISCARDABLE;
else if (K.isText())
Flags |=
- MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
- MCSectionCOFF::IMAGE_SCN_CNT_CODE;
+ COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_CNT_CODE;
else if (K.isBSS ())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+ COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
else if (K.isReadOnly())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ;
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ;
else if (K.isWriteable())
Flags |=
- MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- MCSectionCOFF::IMAGE_SCN_MEM_READ |
- MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE;
return Flags;
}
@@ -995,10 +995,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
unsigned Characteristics = getCOFFSectionFlags(Kind);
- Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT;
+ Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
return getContext().getCOFFSection(Name.str(), Characteristics,
- MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+ COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
}
if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 3d10dc1..5649143 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -33,6 +33,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -381,7 +382,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
DstReg = 0;
unsigned SrcSubIdx, DstSubIdx;
if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
- if (MI.isExtractSubreg()) {
+ if (MI.isCopy()) {
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
} else if (MI.isInsertSubreg()) {
@@ -897,6 +898,108 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
}
}
}
+
+ // If this is an instruction with a load folded into it, try unfolding
+ // the load, e.g. avoid this:
+ // movq %rdx, %rcx
+ // addq (%rax), %rcx
+ // in favor of this:
+ // movq (%rax), %rcx
+ // addq %rdx, %rcx
+ // because it's preferable to schedule a load than a register copy.
+ if (TID.mayLoad() && !regBKilled) {
+ // Determine if a load can be unfolded.
+ unsigned LoadRegIndex;
+ unsigned NewOpc =
+ TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(),
+ /*UnfoldLoad=*/true,
+ /*UnfoldStore=*/false,
+ &LoadRegIndex);
+ if (NewOpc != 0) {
+ const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
+ if (UnfoldTID.getNumDefs() == 1) {
+ MachineFunction &MF = *mbbi->getParent();
+
+ // Unfold the load.
+ DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi);
+ const TargetRegisterClass *RC =
+ UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+ unsigned Reg = MRI->createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 2> NewMIs;
+ if (!TII->unfoldMemoryOperand(MF, mi, Reg,
+ /*UnfoldLoad=*/true,/*UnfoldStore=*/false,
+ NewMIs)) {
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ return false;
+ }
+ assert(NewMIs.size() == 2 &&
+ "Unfolded a load into multiple instructions!");
+ // The load was previously folded, so this is the only use.
+ NewMIs[1]->addRegisterKilled(Reg, TRI);
+
+ // Tentatively insert the instructions into the block so that they
+ // look "normal" to the transformation logic.
+ mbbi->insert(mi, NewMIs[0]);
+ mbbi->insert(mi, NewMIs[1]);
+
+ DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0]
+ << "2addr: NEW INST: " << *NewMIs[1]);
+
+ // Transform the instruction, now that it no longer has a load.
+ unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA);
+ unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB);
+ MachineBasicBlock::iterator NewMI = NewMIs[1];
+ bool TransformSuccess =
+ TryInstructionTransform(NewMI, mi, mbbi,
+ NewSrcIdx, NewDstIdx, Dist);
+ if (TransformSuccess ||
+ NewMIs[1]->getOperand(NewSrcIdx).isKill()) {
+ // Success, or at least we made an improvement. Keep the unfolded
+ // instructions and discard the original.
+ if (LV) {
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = mi->getOperand(i);
+ if (MO.isReg() && MO.getReg() != 0 &&
+ TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isUse()) {
+ if (MO.isKill()) {
+ if (NewMIs[0]->killsRegister(MO.getReg()))
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]);
+ else {
+ assert(NewMIs[1]->killsRegister(MO.getReg()) &&
+ "Kill missing after load unfold!");
+ LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]);
+ }
+ }
+ } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) {
+ if (NewMIs[1]->registerDefIsDead(MO.getReg()))
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]);
+ else {
+ assert(NewMIs[0]->registerDefIsDead(MO.getReg()) &&
+ "Dead flag missing after load unfold!");
+ LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]);
+ }
+ }
+ }
+ }
+ LV->addVirtualRegisterKilled(Reg, NewMIs[1]);
+ }
+ mi->eraseFromParent();
+ mi = NewMIs[1];
+ if (TransformSuccess)
+ return true;
+ } else {
+ // Transforming didn't eliminate the tie and didn't lead to an
+ // improvement. Clean up the unfolded instructions and keep the
+ // original.
+ DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n");
+ NewMIs[0]->eraseFromParent();
+ NewMIs[1]->eraseFromParent();
+ }
+ }
+ }
+ }
+
return false;
}
@@ -1047,14 +1150,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
ReMatRegs.set(regB);
++NumReMats;
} else {
- bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc,
- mi->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Unable to issue a copy instruction!\n");
+ BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
+ regA).addReg(regB);
}
MachineBasicBlock::iterator prevMI = prior(mi);
@@ -1104,12 +1205,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
}
}
-
+
+ // Schedule the source copy / remat inserted to form two-address
+ // instruction. FIXME: Does it matter the distance map may not be
+ // accurate after it's scheduled?
+ TII->scheduleTwoAddrSource(prior(mi), mi, *TRI);
+
MadeChange = true;
DEBUG(dbgs() << "\t\trewrite to:\t" << *mi);
}
+ // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form.
+ if (mi->isInsertSubreg()) {
+ // From %reg = INSERT_SUBREG %reg, %subreg, subidx
+ // To %reg:subidx = COPY %subreg
+ unsigned SubIdx = mi->getOperand(3).getImm();
+ mi->RemoveOperand(3);
+ assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx");
+ mi->getOperand(0).setSubReg(SubIdx);
+ mi->RemoveOperand(1);
+ mi->setDesc(TII->get(TargetOpcode::COPY));
+ DEBUG(dbgs() << "\t\tconvert to:\t" << *mi);
+ }
+
// Clear TiedOperands here instead of at the top of the loop
// since most instructions do not have tied operands.
TiedOperands.clear();
@@ -1136,14 +1255,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
static void UpdateRegSequenceSrcs(unsigned SrcReg,
unsigned DstReg, unsigned SubIdx,
- MachineRegisterInfo *MRI) {
+ MachineRegisterInfo *MRI,
+ const TargetRegisterInfo &TRI) {
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
RE = MRI->reg_end(); RI != RE; ) {
MachineOperand &MO = RI.getOperand();
++RI;
- MO.setReg(DstReg);
- assert(MO.getSubReg() == 0);
- MO.setSubReg(SubIdx);
+ MO.substVirtReg(DstReg, SubIdx, TRI);
}
}
@@ -1165,55 +1283,102 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
if (!Seen.insert(SrcReg))
continue;
- // If there are no other uses than extract_subreg which feed into
+ // Check that the instructions are all in the same basic block.
+ MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg);
+ MachineInstr *DstDefMI = MRI->getVRegDef(DstReg);
+ if (SrcDefMI->getParent() != DstDefMI->getParent())
+ continue;
+
+ // If there are no other uses than copies which feed into
// the reg_sequence, then we might be able to coalesce them.
bool CanCoalesce = true;
- SmallVector<unsigned, 4> SubIndices;
+ SmallVector<unsigned, 4> SrcSubIndices, DstSubIndices;
for (MachineRegisterInfo::use_nodbg_iterator
UI = MRI->use_nodbg_begin(SrcReg),
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
- if (!UseMI->isExtractSubreg() ||
- UseMI->getOperand(0).getReg() != DstReg) {
+ if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) {
CanCoalesce = false;
break;
}
- SubIndices.push_back(UseMI->getOperand(2).getImm());
+ SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg());
+ DstSubIndices.push_back(UseMI->getOperand(0).getSubReg());
}
- if (!CanCoalesce || SubIndices.size() < 2)
+ if (!CanCoalesce || SrcSubIndices.size() < 2)
continue;
- std::sort(SubIndices.begin(), SubIndices.end());
- unsigned NewSubIdx = 0;
- if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
- NewSubIdx)) {
- bool Proceed = true;
- if (NewSubIdx)
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ) {
- MachineOperand &MO = RI.getOperand();
- ++RI;
- // FIXME: If the sub-registers do not combine to the whole
- // super-register, i.e. NewSubIdx != 0, and any of the use has a
- // sub-register index, then abort the coalescing attempt.
- if (MO.getSubReg()) {
- Proceed = false;
- break;
- }
- MO.setReg(DstReg);
- MO.setSubReg(NewSubIdx);
- }
- if (Proceed)
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ) {
- MachineOperand &MO = RI.getOperand();
- ++RI;
- MO.setReg(DstReg);
- if (NewSubIdx)
- MO.setSubReg(NewSubIdx);
- }
+ // Check that the source subregisters can be combined.
+ std::sort(SrcSubIndices.begin(), SrcSubIndices.end());
+ unsigned NewSrcSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices,
+ NewSrcSubIdx))
+ continue;
+
+ // Check that the destination subregisters can also be combined.
+ std::sort(DstSubIndices.begin(), DstSubIndices.end());
+ unsigned NewDstSubIdx = 0;
+ if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices,
+ NewDstSubIdx))
+ continue;
+
+ // If neither source nor destination can be combined to the full register,
+ // just give up. This could be improved if it ever matters.
+ if (NewSrcSubIdx != 0 && NewDstSubIdx != 0)
+ continue;
+
+ // Now that we know that all the uses are extract_subregs and that those
+ // subregs can somehow be combined, scan all the extract_subregs again to
+ // make sure the subregs are in the right order and can be composed.
+ MachineInstr *SomeMI = 0;
+ CanCoalesce = true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ assert(UseMI->isCopy());
+ unsigned DstSubIdx = UseMI->getOperand(0).getSubReg();
+ unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg();
+ assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination");
+ if ((NewDstSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) ||
+ (NewSrcSubIdx == 0 &&
+ TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) {
+ CanCoalesce = false;
+ break;
+ }
+ // Keep track of one of the uses.
+ SomeMI = UseMI;
+ }
+ if (!CanCoalesce)
+ continue;
+
+ // Insert a copy to replace the original.
+ MachineBasicBlock::iterator InsertLoc = SomeMI;
+ MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
+ SomeMI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, NewDstSubIdx)
+ .addReg(SrcReg, 0, NewSrcSubIdx);
+
+ // Remove all the old extract instructions.
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ) {
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI == CopyMI)
+ continue;
+ assert(UseMI->isCopy());
+ // Move any kills to the new copy or extract instruction.
+ if (UseMI->getOperand(1).isKill()) {
+ CopyMI->getOperand(1).setIsKill();
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI);
}
+ UseMI->eraseFromParent();
+ }
}
}
@@ -1268,15 +1433,13 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
}
IsImpDef = false;
- // Remember EXTRACT_SUBREG sources. These might be candidate for
- // coalescing.
- if (DefMI->isExtractSubreg())
+ // Remember COPY sources. These might be candidate for coalescing.
+ if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg())
RealSrcs.push_back(DefMI->getOperand(1).getReg());
- if (!Seen.insert(SrcReg) ||
- MI->getParent() != DefMI->getParent() ||
- !MI->getOperand(i).isKill() ||
- HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ bool isKill = MI->getOperand(i).isKill();
+ if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source is live-in the block. We don't want
// to end up with a partial-redef of a livein, e.g.
@@ -1292,30 +1455,23 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
// If the REG_SEQUENCE doesn't kill its source, keeping live variables
// correctly up to date becomes very difficult. Insert a copy.
//
- const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
- unsigned NewReg = MRI->createVirtualRegister(RC);
MachineBasicBlock::iterator InsertLoc = MI;
- bool Emitted =
- TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC,
- MI->getDebugLoc());
- (void)Emitted;
- assert(Emitted && "Unable to issue a copy instruction!\n");
- MI->getOperand(i).setReg(NewReg);
- if (MI->getOperand(i).isKill()) {
- MachineBasicBlock::iterator CopyMI = prior(InsertLoc);
- MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg);
- KillMO->setIsKill();
- if (LV)
- // Update live variables
- LV->replaceKillInstruction(SrcReg, MI, &*CopyMI);
- }
+ MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
+ MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
+ .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+ .addReg(SrcReg, getKillRegState(isKill));
+ MI->getOperand(i).setReg(0);
+ if (LV && isKill)
+ LV->replaceKillInstruction(SrcReg, MI, CopyMI);
+ DEBUG(dbgs() << "Inserted: " << *CopyMI);
}
}
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ if (!SrcReg) continue;
unsigned SubIdx = MI->getOperand(i+1).getImm();
- UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
+ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI);
}
if (IsImpDef) {
@@ -1328,8 +1484,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MI->eraseFromParent();
}
- // Try coalescing some EXTRACT_SUBREG instructions.
- CoalesceExtSubRegs(RealSrcs, DstReg);
+ // Try coalescing some EXTRACT_SUBREG instructions. This can create
+ // INSERT_SUBREG instructions that must have <undef> flags added by
+ // LiveIntervalAnalysis, so only run it when LiveVariables is available.
+ if (LV)
+ CoalesceExtSubRegs(RealSrcs, DstReg);
}
RegSequences.clear();
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 871d836..57a1500 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -667,8 +667,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
assert(TID.getNumDefs() == 1 &&
"Don't know how to remat instructions that define > 1 values!");
#endif
- TII->reMaterialize(MBB, MII, DestReg,
- ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
+ TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
MachineInstr *NewMI = prior(MII);
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
@@ -769,7 +768,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end();
I != E; ++I) {
unsigned Reg = I->first;
- const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg);
+ const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg);
// FIXME: A temporary workaround. We can't reuse available value if it's
// not safe to move the def of the virtual register's class. e.g.
// X86::RFP* register classes. Do not add it as a live-in.
@@ -1022,7 +1021,7 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
unsigned Kill = Kills[i];
if (!Defs[Kill] && !Uses[Kill] &&
- TRI->getPhysicalRegisterRegClass(Kill) == RC)
+ RC->contains(Kill))
return Kill;
}
for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
@@ -1410,25 +1409,25 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
assert(NewMIs.size() == 1);
MachineInstr *NewMI = NewMIs.back();
+ MBB->insert(MII, NewMI);
NewMIs.clear();
int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false);
assert(Idx != -1);
SmallVector<unsigned, 1> Ops;
Ops.push_back(Idx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS);
+ NewMI->eraseFromParent();
if (FoldedMI) {
VRM->addSpillSlotUse(SS, FoldedMI);
if (!VRM->hasPhys(UnfoldVR))
VRM->assignVirt2Phys(UnfoldVR, UnfoldPR);
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
- MII = MBB->insert(MII, FoldedMI);
+ MII = FoldedMI;
InvalidateKills(MI, TRI, RegKills, KillOps);
VRM->RemoveMachineInstrFromMaps(&MI);
MBB->erase(&MI);
- MF.DeleteMachineInstr(NewMI);
return true;
}
- MF.DeleteMachineInstr(NewMI);
}
}
@@ -1480,7 +1479,6 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
if (MII == MBB->begin() || !MII->killsRegister(SrcReg))
return false;
- MachineFunction &MF = *MBB->getParent();
MachineInstr &MI = *MII;
MachineBasicBlock::iterator DefMII = prior(MII);
MachineInstr *DefMI = DefMII;
@@ -1511,11 +1509,12 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true);
if (!CommutedMI)
return false;
+ MBB->insert(MII, CommutedMI);
SmallVector<unsigned, 1> Ops;
Ops.push_back(NewDstIdx);
- MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS);
+ MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS);
// Not needed since foldMemoryOperand returns new MI.
- MF.DeleteMachineInstr(CommutedMI);
+ CommutedMI->eraseFromParent();
if (!FoldedMI)
return false;
@@ -1528,7 +1527,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
MachineInstr *StoreMI = MII;
VRM->addSpillSlotUse(SS, StoreMI);
VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
- MII = MBB->insert(MII, FoldedMI); // Update MII to backtrack.
+ MII = FoldedMI; // Update MII to backtrack.
// Delete all 3 old instructions.
InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
@@ -1704,7 +1703,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
std::vector<unsigned> &EmSpills = VRM->getEmergencySpills(MI);
for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) {
unsigned PhysReg = EmSpills[i];
- const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg);
assert(RC && "Unable to determine register class!");
int SS = VRM->getEmergencySpillSlot(RC);
if (UsedSS.count(SS))
@@ -1759,7 +1758,6 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
bool DoReMat = VRM->isReMaterialized(VirtReg);
int SSorRMId = DoReMat
? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
if (InReg == Phys) {
// If the value is already available in the expected register, save
@@ -1793,20 +1791,16 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
MachineBasicBlock::iterator InsertLoc =
ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
*MBB->getParent());
-
- TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC,
- MI->getDebugLoc());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), Phys)
+ .addReg(InReg, RegState::Kill);
// This invalidates Phys.
Spills.ClobberPhysReg(Phys);
// Remember it's available.
Spills.addAvailable(SSorRMId, Phys);
- // Mark is killed.
- MachineInstr *CopyMI = prior(InsertLoc);
CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
- KillOpnd->setIsKill();
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
DEBUG(dbgs() << '\t' << *CopyMI);
@@ -2013,7 +2007,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// = EXTRACT_SUBREG fi#1
// fi#1 is available in EDI, but it cannot be reused because it's not in
// the right register file.
- if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) {
+ if (PhysReg && !AvoidReload && SubIdx) {
const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
if (!RC->contains(PhysReg))
PhysReg = 0;
@@ -2034,6 +2028,18 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
Spills.canClobberPhysReg(PhysReg);
}
+ // If this is an asm, and PhysReg is used elsewhere as an earlyclobber
+ // operand, we can't also use it as an input. (Outputs always come
+ // before inputs, so we can stop looking at i.)
+ if (MI.isInlineAsm()) {
+ for (unsigned k=0; k<i; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.getReg()==PhysReg && MOk.isEarlyClobber()) {
+ CanReuse = false;
+ break;
+ }
+ }
+ }
if (CanReuse) {
// If this stack slot value is already available, reuse it!
@@ -2104,6 +2110,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// To avoid this problem, and to avoid doing a load right after a store,
// we emit a copy from PhysReg into the designated register for this
// operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
unsigned DesignatedReg = VRM->getPhys(VirtReg);
assert(DesignatedReg && "Must map virtreg to physreg!");
@@ -2136,7 +2144,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
continue;
}
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
MRI->setPhysRegUsed(DesignatedReg);
ReusedOperands.markClobbered(DesignatedReg);
@@ -2144,11 +2151,9 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
MachineBasicBlock::iterator InsertLoc =
ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
SSorRMId, TII, MF);
-
- TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC,
- MI.getDebugLoc());
-
- MachineInstr *CopyMI = prior(InsertLoc);
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -2269,27 +2274,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) {
DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
if (DestReg != InReg) {
- const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC,
- MI.getDebugLoc());
MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
- unsigned SubIdx = DefMO->getSubReg();
+ MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY))
+ .addReg(DestReg, RegState::Define, DefMO->getSubReg())
+ .addReg(InReg, RegState::Kill);
// Revisit the copy so we make sure to notice the effects of the
// operation on the destreg (either needing to RA it if it's
// virtual or needing to clobber any values if it's physical).
- NextMII = &MI;
- --NextMII; // backtrack to the copy.
+ NextMII = CopyMI;
NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- // Propagate the sub-register index over.
- if (SubIdx) {
- DefMO = NextMII->findRegisterDefOperand(DestReg);
- DefMO->setSubReg(SubIdx);
- }
-
- // Mark is killed.
- MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg);
- KillOpnd->setIsKill();
-
BackTracked = true;
} else {
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
@@ -2430,6 +2424,24 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Also check if it's copying from an "undef", if so, we can't
// eliminate this or else the undef marker is lost and it will
// confuses the scavenger. This is extremely rare.
+ if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() &&
+ MI.getNumOperands() == 2) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ SmallVector<unsigned, 2> KillRegs;
+ InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs);
+ if (MO.isDead() && !KillRegs.empty()) {
+ // Source register or an implicit super/sub-register use is killed.
+ assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg()));
+ // Last def is now dead.
+ TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
+ }
+ VRM->RemoveMachineInstrFromMaps(&MI);
+ MBB->erase(&MI);
+ Erased = true;
+ Spills.disallowClobberPhysReg(VirtReg);
+ goto ProcessNextInst;
+ }
unsigned Src, Dst, SrcSR, DstSR;
if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
Src == Dst && SrcSR == DstSR &&
@@ -2519,6 +2531,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
+ if (MI.isIdentityCopy()) {
+ ++NumDCE;
+ DEBUG(dbgs() << "Removing now-noop copy: " << MI);
+ InvalidateKills(MI, TRI, RegKills, KillOps);
+ VRM->RemoveMachineInstrFromMaps(&MI);
+ MBB->erase(&MI);
+ Erased = true;
+ UpdateKills(*LastStore, TRI, RegKills, KillOps);
+ goto ProcessNextInst;
+ }
{
unsigned Src, Dst, SrcSR, DstSR;
if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 5e558ca..c8488b2 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -85,7 +85,8 @@ StrVector Tool::SortArgs(ArgsVector& Args) const {
StrVector Out;
// HACK: this won't be needed when we'll migrate away from CommandLine.
- std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>);
+ std::stable_sort(Args.begin(), Args.end(),
+ &CompareFirst<unsigned, std::string>);
for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
Out.push_back(B->second);
}
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 0748b54..59ebe6e 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy,
ECStack.pop_back();
if (ECStack.empty()) { // Finished main. Put result into exit code...
- if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type?
+ if (RetTy && !RetTy->isVoidTy()) { // Nonvoid return type?
ExitValue = Result; // Capture the exit value of the program
} else {
memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped));
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 26a53b5..57d1260 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -266,7 +266,7 @@ GenericValue Interpreter::callExternalFunction(Function *F,
RawFn = (RawFunc)(intptr_t)
sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
if (!RawFn)
- RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
+ RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
if (RawFn != 0)
RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later
} else {
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 546d2b2..67bd3ed 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -626,10 +626,7 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!");
- // JIT the function
- isAlreadyCodeGenerating = true;
- jitstate->getPM(locked).run(*F);
- isAlreadyCodeGenerating = false;
+ jitTheFunction(F, locked);
// If the function referred to another function that had not yet been
// read from bitcode, and we are jitting non-lazily, emit it now.
@@ -640,10 +637,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
assert(!PF->hasAvailableExternallyLinkage() &&
"Externally-defined function should not be in pending list.");
- // JIT the function
- isAlreadyCodeGenerating = true;
- jitstate->getPM(locked).run(*PF);
- isAlreadyCodeGenerating = false;
+ jitTheFunction(PF, locked);
// Now that the function has been jitted, ask the JITEmitter to rewrite
// the stub with real address of the function.
@@ -651,6 +645,15 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
}
}
+void JIT::jitTheFunction(Function *F, const MutexGuard &locked) {
+ isAlreadyCodeGenerating = true;
+ jitstate->getPM(locked).run(*F);
+ isAlreadyCodeGenerating = false;
+
+ // clear basic block addresses after this function is done
+ getBasicBlockAddressMap(locked).clear();
+}
+
/// getPointerToFunction - This method is used to get the address of the
/// specified function, compiling it if neccesary.
///
@@ -687,6 +690,41 @@ void *JIT::getPointerToFunction(Function *F) {
return Addr;
}
+void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) {
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I == getBasicBlockAddressMap(locked).end()) {
+ getBasicBlockAddressMap(locked)[BB] = Addr;
+ } else {
+ // ignore repeats: some BBs can be split into few MBBs?
+ }
+}
+
+void JIT::clearPointerToBasicBlock(const BasicBlock *BB) {
+ MutexGuard locked(lock);
+ getBasicBlockAddressMap(locked).erase(BB);
+}
+
+void *JIT::getPointerToBasicBlock(BasicBlock *BB) {
+ // make sure it's function is compiled by JIT
+ (void)getPointerToFunction(BB->getParent());
+
+ // resolve basic block address
+ MutexGuard locked(lock);
+
+ BasicBlockAddressMapTy::iterator I =
+ getBasicBlockAddressMap(locked).find(BB);
+ if (I != getBasicBlockAddressMap(locked).end()) {
+ return I->second;
+ } else {
+ assert(0 && "JIT does not have BB address for address-of-label, was"
+ " it eliminated by optimizer?");
+ return 0;
+ }
+}
+
/// getOrEmitGlobalVariable - Return the address of the specified global
/// variable, possibly emitting it to memory if needed. This is used by the
/// Emitter.
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index edae719..1d1763e 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -51,6 +51,10 @@ public:
class JIT : public ExecutionEngine {
+ /// types
+ typedef ValueMap<const BasicBlock *, void *>
+ BasicBlockAddressMapTy;
+ /// data
TargetMachine &TM; // The current target we are compiling to
TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
JITCodeEmitter *JCE; // JCE object
@@ -67,6 +71,12 @@ class JIT : public ExecutionEngine {
JITState *jitstate;
+ /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+ /// actualized version, only filled for basic blocks that have their address
+ /// taken.
+ BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
JITMemoryManager *JMM, CodeGenOpt::Level OptLevel,
bool AllocateGVsWithCode);
@@ -90,9 +100,9 @@ public:
CodeGenOpt::Level OptLevel =
CodeGenOpt::Default,
bool GVsWithCode = true,
- CodeModel::Model CMM = CodeModel::Default) {
+ CodeModel::Model CMM = CodeModel::Default) {
return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
- CMM);
+ CMM);
}
virtual void addModule(Module *M);
@@ -127,10 +137,15 @@ public:
///
void *getPointerToFunction(Function *F);
- void *getPointerToBasicBlock(BasicBlock *BB) {
- assert(0 && "JIT does not support address-of-label yet!");
- return 0;
- }
+ /// addPointerToBasicBlock - Adds address of the specific basic block.
+ void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+ /// clearPointerToBasicBlock - Removes address of specific basic block.
+ void clearPointerToBasicBlock(const BasicBlock *BB);
+
+ /// getPointerToBasicBlock - This returns the address of the specified basic
+ /// block, assuming function is compiled.
+ void *getPointerToBasicBlock(BasicBlock *BB);
/// getOrEmitGlobalVariable - Return the address of the specified global
/// variable, possibly emitting it to memory if needed. This is used by the
@@ -197,11 +212,18 @@ public:
const JITEvent_EmittedFunctionDetails &Details);
void NotifyFreeingMachineCode(void *OldPtr);
+ BasicBlockAddressMapTy &
+ getBasicBlockAddressMap(const MutexGuard &) {
+ return BasicBlockAddressMap;
+ }
+
+
private:
static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
TargetMachine &tm);
void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
void updateFunctionStub(Function *F);
+ void jitTheFunction(Function *F, const MutexGuard &locked);
protected:
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index e3855b2..28d79da 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -435,6 +435,9 @@ namespace {
if (MBBLocations.size() <= (unsigned)MBB->getNumber())
MBBLocations.resize((MBB->getNumber()+1)*2);
MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+ if (MBB->hasAddressTaken())
+ TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(),
+ (void*)getCurrentPCValue());
DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
<< (void*) getCurrentPCValue() << "]\n");
}
@@ -442,7 +445,7 @@ namespace {
virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
- virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{
assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
MBBLocations[MBB->getNumber()] && "MBB not emitted!");
return MBBLocations[MBB->getNumber()];
@@ -1310,6 +1313,11 @@ void JITEmitter::retryWithMoreMemory(MachineFunction &F) {
deallocateMemForFunction(F.getFunction());
// Try again with at least twice as much free space.
SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin));
+
+ for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){
+ if (MBB->hasAddressTaken())
+ TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock());
+ }
}
/// deallocateMemForFunction - Deallocate all memory for the specified
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 2c22550..1be2bec 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -160,27 +160,26 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
// Check for a file of name "-", which means "read standard input"
if (File.str() == "-") {
std::auto_ptr<Module> M;
- MemoryBuffer *Buffer = MemoryBuffer::getSTDIN();
- if (!Buffer->getBufferSize()) {
- delete Buffer;
- Error = "standard input is empty";
- } else {
- M.reset(ParseBitcodeFile(Buffer, Context, &Error));
- delete Buffer;
- if (M.get())
- if (!LinkInModule(M.get(), &Error))
- return false;
+ if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(&Error)) {
+ if (!Buffer->getBufferSize()) {
+ delete Buffer;
+ Error = "standard input is empty";
+ } else {
+ M.reset(ParseBitcodeFile(Buffer, Context, &Error));
+ delete Buffer;
+ if (M.get())
+ if (!LinkInModule(M.get(), &Error))
+ return false;
+ }
}
return error("Cannot link stdin: " + Error);
}
- // Make sure we can at least read the file
- if (!File.canRead())
+ // Determine what variety of file it is.
+ std::string Magic;
+ if (!File.getMagicNumber(Magic, 64))
return error("Cannot find linker input '" + File.str() + "'");
- // If its an archive, try to link it in
- std::string Magic;
- File.getMagicNumber(Magic, 64);
switch (sys::IdentifyFileType(Magic.c_str(), 64)) {
default: llvm_unreachable("Bad file type identification");
case sys::Unknown_FileType:
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 5e8a3b6..fc4f3c6 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMMC
MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCNullStreamer.cpp
+ MCObjectStreamer.cpp
MCObjectWriter.cpp
MCSection.cpp
MCSectionCOFF.cpp
@@ -23,5 +24,7 @@ add_llvm_library(LLVMMC
MCSymbol.cpp
MCValue.cpp
MachObjectWriter.cpp
+ WinCOFFStreamer.cpp
+ WinCOFFObjectWriter.cpp
TargetAsmBackend.cpp
)
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 57b2bcc..e272b60 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -275,19 +275,20 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Global: // .globl/.global
OS << MAI.getGlobalDirective();
break;
- case MCSA_Hidden: OS << ".hidden "; break;
- case MCSA_IndirectSymbol: OS << ".indirect_symbol "; break;
- case MCSA_Internal: OS << ".internal "; break;
- case MCSA_LazyReference: OS << ".lazy_reference "; break;
- case MCSA_Local: OS << ".local "; break;
- case MCSA_NoDeadStrip: OS << ".no_dead_strip "; break;
- case MCSA_PrivateExtern: OS << ".private_extern "; break;
- case MCSA_Protected: OS << ".protected "; break;
- case MCSA_Reference: OS << ".reference "; break;
- case MCSA_Weak: OS << ".weak "; break;
- case MCSA_WeakDefinition: OS << ".weak_definition "; break;
+ case MCSA_Hidden: OS << "\t.hidden\t"; break;
+ case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break;
+ case MCSA_Internal: OS << "\t.internal\t"; break;
+ case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break;
+ case MCSA_Local: OS << "\t.local\t"; break;
+ case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break;
+ case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break;
+ case MCSA_Protected: OS << "\t.protected\t"; break;
+ case MCSA_Reference: OS << "\t.reference\t"; break;
+ case MCSA_Weak: OS << "\t.weak\t"; break;
+ case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break;
// .weak_reference
case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break;
+ case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break;
}
OS << *Symbol;
@@ -693,7 +694,6 @@ void MCAsmStreamer::EmitRawText(StringRef String) {
}
void MCAsmStreamer::Finish() {
- OS.flush();
}
MCStreamer *llvm::createAsmStreamer(MCContext &Context,
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 5936656..7d84554 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -308,24 +308,23 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
return !B_Base && BaseSymbol == A_Base;
}
-bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
+bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const {
// Non-temporary labels should always be visible to the linker.
- if (!SD->getSymbol().isTemporary())
+ if (!Symbol.isTemporary())
return true;
// Absolute temporary labels are never visible.
- if (!SD->getFragment())
+ if (!Symbol.isInSection())
return false;
// Otherwise, check if the section requires symbols even for temporary labels.
- return getBackend().doesSectionRequireSymbols(
- SD->getFragment()->getParent()->getSection());
+ return getBackend().doesSectionRequireSymbols(Symbol.getSection());
}
const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
const MCSymbolData *SD) const {
// Linker visible symbols define atoms.
- if (isSymbolLinkerVisible(SD))
+ if (isSymbolLinkerVisible(SD->getSymbol()))
return SD;
// Absolute and undefined symbols have no defining atom.
@@ -685,12 +684,8 @@ void MCAssembler::Finish() {
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
// Create dummy fragments to eliminate any empty sections, this simplifies
// layout.
- if (it->getFragmentList().empty()) {
- unsigned ValueSize = 1;
- if (getBackend().isVirtualSection(it->getSection()))
- ValueSize = 1;
+ if (it->getFragmentList().empty())
new MCFillFragment(0, 1, 0, it);
- }
it->setOrdinal(SectionIndex++);
}
@@ -759,7 +754,6 @@ void MCAssembler::Finish() {
// Write the object file.
Writer->WriteObject(*this, Layout);
- OS.flush();
stats::ObjectBytes += OS.tell() - StartOffset;
}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 53ffc94..1137064 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -27,6 +27,10 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
MachOUniquingMap = 0;
ELFUniquingMap = 0;
COFFUniquingMap = 0;
+
+ SecureLogFile = getenv("AS_SECURE_LOG_FILE");
+ SecureLog = 0;
+ SecureLogUsed = false;
}
MCContext::~MCContext() {
@@ -37,6 +41,9 @@ MCContext::~MCContext() {
delete (MachOUniqueMapTy*)MachOUniquingMap;
delete (ELFUniqueMapTy*)ELFUniquingMap;
delete (COFFUniqueMapTy*)COFFUniquingMap;
+
+ // If the stream for the .secure_log_unique directive was created free it.
+ delete (raw_ostream*)SecureLog;
}
//===----------------------------------------------------------------------===//
@@ -90,14 +97,14 @@ MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
Twine(LocalLabelVal) +
"\2" +
- Twine(NextInstance(LocalLabelVal)));
+ Twine(NextInstance(LocalLabelVal)));
}
MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
int bORf) {
return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
Twine(LocalLabelVal) +
"\2" +
- Twine(GetInstance(LocalLabelVal) + bORf));
+ Twine(GetInstance(LocalLabelVal) + bORf));
}
MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index c000dd7..343f334 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -40,7 +40,7 @@ void MCExpr::print(raw_ostream &OS) const {
const MCSymbol &Sym = SRE.getSymbol();
if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 ||
- SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
// Parenthesize names that start with $ so that they don't look like
@@ -51,8 +51,8 @@ void MCExpr::print(raw_ostream &OS) const {
OS << Sym;
if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
- SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
- SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
+ SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
+ SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
return;
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 27e4e98..44bc267 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -14,6 +14,7 @@
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCMachOSymbolFlags.h"
@@ -25,21 +26,13 @@ using namespace llvm;
namespace {
-class MCMachOStreamer : public MCStreamer {
-
-private:
- MCAssembler Assembler;
- MCSectionData *CurSectionData;
-
- /// Track the current atom for each section.
- DenseMap<const MCSectionData*, MCSymbolData*> CurrentAtomMap;
-
+class MCMachOStreamer : public MCObjectStreamer {
private:
MCFragment *getCurrentFragment() const {
- assert(CurSectionData && "No current section!");
+ assert(getCurrentSectionData() && "No current section!");
- if (!CurSectionData->empty())
- return &CurSectionData->getFragmentList().back();
+ if (!getCurrentSectionData()->empty())
+ return &getCurrentSectionData()->getFragmentList().back();
return 0;
}
@@ -49,28 +42,17 @@ private:
MCDataFragment *getOrCreateDataFragment() const {
MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (!F)
- F = createDataFragment();
+ F = new MCDataFragment(getCurrentSectionData());
return F;
}
- /// Create a new data fragment in the current section.
- MCDataFragment *createDataFragment() const {
- MCDataFragment *DF = new MCDataFragment(CurSectionData);
- DF->setAtom(CurrentAtomMap.lookup(CurSectionData));
- return DF;
- }
-
void EmitInstToFragment(const MCInst &Inst);
void EmitInstToData(const MCInst &Inst);
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
- raw_ostream &_OS, MCCodeEmitter *_Emitter)
- : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS),
- CurSectionData(0) {}
- ~MCMachOStreamer() {}
-
- MCAssembler &getAssembler() { return Assembler; }
+ raw_ostream &OS, MCCodeEmitter *Emitter)
+ : MCObjectStreamer(Context, TAB, OS, Emitter) {}
const MCExpr *AddValueSymbols(const MCExpr *Value) {
switch (Value->getKind()) {
@@ -86,7 +68,7 @@ public:
}
case MCExpr::SymbolRef:
- Assembler.getOrCreateSymbolData(
+ getAssembler().getOrCreateSymbolData(
cast<MCSymbolRefExpr>(Value)->getSymbol());
break;
@@ -101,7 +83,6 @@ public:
/// @name MCStreamer Interface
/// @{
- virtual void SwitchSection(const MCSection *Section);
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
@@ -152,6 +133,7 @@ public:
}
virtual void EmitInstruction(const MCInst &Inst);
+
virtual void Finish();
/// @}
@@ -159,38 +141,25 @@ public:
} // end anonymous namespace.
-void MCMachOStreamer::SwitchSection(const MCSection *Section) {
- assert(Section && "Cannot switch to a null section!");
-
- // If already in this section, then this is a noop.
- if (Section == CurSection) return;
-
- CurSection = Section;
- CurSectionData = &Assembler.getOrCreateSectionData(*Section);
-}
-
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(CurSection && "Cannot emit before setting section!");
- MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+ Symbol->setSection(*CurSection);
- // Update the current atom map, if necessary.
- bool MustCreateFragment = false;
- if (Assembler.isSymbolLinkerVisible(&SD)) {
- CurrentAtomMap[CurSectionData] = &SD;
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
- // We have to create a new fragment, fragments cannot span atoms.
- MustCreateFragment = true;
- }
+ // We have to create a new fragment if this is an atom defining symbol,
+ // fragments cannot span atoms.
+ if (getAssembler().isSymbolLinkerVisible(SD.getSymbol()))
+ new MCDataFragment(getCurrentSectionData());
// FIXME: This is wasteful, we don't necessarily need to create a data
// fragment. Instead, we should mark the symbol as pointing into the data
// fragment if it exists, otherwise we should just queue the label and set its
// fragment pointer when we emit the next fragment.
- MCDataFragment *F =
- MustCreateFragment ? createDataFragment() : getOrCreateDataFragment();
+ MCDataFragment *F = getOrCreateDataFragment();
assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
SD.setFragment(F);
SD.setOffset(F->getContents().size());
@@ -203,14 +172,12 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
// FIXME: Cleanup this code, these bits should be emitted based on semantic
// properties, not on the order of definition, etc.
SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
-
- Symbol->setSection(*CurSection);
}
void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
switch (Flag) {
case MCAF_SubsectionsViaSymbols:
- Assembler.setSubsectionsViaSymbols(true);
+ getAssembler().setSubsectionsViaSymbols(true);
return;
}
@@ -219,7 +186,7 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
// FIXME: Lift context changes into super class.
- Assembler.getOrCreateSymbolData(*Symbol);
+ getAssembler().getOrCreateSymbolData(*Symbol);
Symbol->setVariableValue(AddValueSymbols(Value));
}
@@ -232,15 +199,15 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
// important for matching the string table that 'as' generates.
IndirectSymbolData ISD;
ISD.Symbol = Symbol;
- ISD.SectionData = CurSectionData;
- Assembler.getIndirectSymbols().push_back(ISD);
+ ISD.SectionData = getCurrentSectionData();
+ getAssembler().getIndirectSymbols().push_back(ISD);
return;
}
// Adding a symbol attribute always introduces the symbol, note that an
// important side effect of calling getOrCreateSymbolData here is to register
// the symbol with the assembler.
- MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
// The implementation of symbol attributes is designed to match 'as', but it
// leaves much to desired. It doesn't really make sense to arbitrarily add and
@@ -306,6 +273,10 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
// it has to be in a coalesced section, but this isn't enforced.
SD.setFlags(SD.getFlags() | SF_WeakDefinition);
break;
+
+ case MCSA_WeakDefAutoPrivate:
+ SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference);
+ break;
}
}
@@ -313,7 +284,8 @@ void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
// Encode the 'desc' value into the lowest implementation defined bits.
assert(DescValue == (DescValue & SF_DescFlagsMask) &&
"Invalid .desc value!");
- Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask);
+ getAssembler().getOrCreateSymbolData(*Symbol).setFlags(
+ DescValue & SF_DescFlagsMask);
}
void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
@@ -321,14 +293,14 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
// FIXME: Darwin 'as' does appear to allow redef of a .comm by itself.
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
SD.setExternal(true);
SD.setCommon(Size, ByteAlignment);
}
void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {
- MCSectionData &SectData = Assembler.getOrCreateSectionData(*Section);
+ MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section);
// The symbol may not be present, which only creates the section.
if (!Symbol)
@@ -338,7 +310,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+ MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
// Emit an align fragment if necessary.
if (ByteAlignment != 1)
@@ -346,8 +318,6 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
SD.setFragment(F);
- if (Assembler.isSymbolLinkerVisible(&SD))
- F->setAtom(&SD);
Symbol->setSection(*Section);
@@ -391,13 +361,12 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize,
- MaxBytesToEmit, CurSectionData);
- F->setAtom(CurrentAtomMap.lookup(CurSectionData));
+ new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
+ getCurrentSectionData());
// Update the maximum alignment on the current section if necessary.
- if (ByteAlignment > CurSectionData->getAlignment())
- CurSectionData->setAlignment(ByteAlignment);
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
@@ -405,24 +374,21 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
- CurSectionData);
+ getCurrentSectionData());
F->setEmitNops(true);
- F->setAtom(CurrentAtomMap.lookup(CurSectionData));
// Update the maximum alignment on the current section if necessary.
- if (ByteAlignment > CurSectionData->getAlignment())
- CurSectionData->setAlignment(ByteAlignment);
+ if (ByteAlignment > getCurrentSectionData()->getAlignment())
+ getCurrentSectionData()->setAlignment(ByteAlignment);
}
void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
- MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData);
- F->setAtom(CurrentAtomMap.lookup(CurSectionData));
+ new MCOrgFragment(*Offset, Value, getCurrentSectionData());
}
void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
- MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
- IF->setAtom(CurrentAtomMap.lookup(CurSectionData));
+ MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData());
// Add the fixups and data.
//
@@ -431,7 +397,7 @@ void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
VecOS.flush();
IF->getCode() = Code;
@@ -444,7 +410,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
- Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
VecOS.flush();
// Add the fixups and data.
@@ -461,21 +427,21 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
if (Inst.getOperand(i).isExpr())
AddValueSymbols(Inst.getOperand(i).getExpr());
- CurSectionData->setHasInstructions(true);
+ getCurrentSectionData()->setHasInstructions(true);
// If this instruction doesn't need relaxation, just emit it as data.
- if (!Assembler.getBackend().MayNeedRelaxation(Inst)) {
+ if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) {
EmitInstToData(Inst);
return;
}
// Otherwise, if we are relaxing everything, relax the instruction as much as
// possible and emit it as data.
- if (Assembler.getRelaxAll()) {
+ if (getAssembler().getRelaxAll()) {
MCInst Relaxed;
- Assembler.getBackend().RelaxInstruction(Inst, Relaxed);
- while (Assembler.getBackend().MayNeedRelaxation(Relaxed))
- Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed);
+ getAssembler().getBackend().RelaxInstruction(Inst, Relaxed);
+ while (getAssembler().getBackend().MayNeedRelaxation(Relaxed))
+ getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed);
EmitInstToData(Relaxed);
return;
}
@@ -485,7 +451,36 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
}
void MCMachOStreamer::Finish() {
- Assembler.Finish();
+ // We have to set the fragment atom associations so we can relax properly for
+ // Mach-O.
+
+ // First, scan the symbol table to build a lookup table from fragments to
+ // defining symbols.
+ DenseMap<const MCFragment*, MCSymbolData*> DefiningSymbolMap;
+ for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(),
+ ie = getAssembler().symbol_end(); it != ie; ++it) {
+ if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) &&
+ it->getFragment()) {
+ // An atom defining symbol should never be internal to a fragment.
+ assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!");
+ DefiningSymbolMap[it->getFragment()] = it;
+ }
+ }
+
+ // Set the fragment atom associations by tracking the last seen atom defining
+ // symbol.
+ for (MCAssembler::iterator it = getAssembler().begin(),
+ ie = getAssembler().end(); it != ie; ++it) {
+ MCSymbolData *CurrentAtom = 0;
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2) {
+ if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2))
+ CurrentAtom = SD;
+ it2->setAtom(CurrentAtom);
+ }
+ }
+
+ this->MCObjectStreamer::Finish();
}
MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
new file mode 100644
index 0000000..d3f7f77
--- /dev/null
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -0,0 +1,39 @@
+//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCObjectStreamer.h"
+
+#include "llvm/MC/MCAssembler.h"
+using namespace llvm;
+
+MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
+ raw_ostream &_OS, MCCodeEmitter *_Emitter)
+ : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB,
+ *_Emitter, _OS)),
+ CurSectionData(0)
+{
+}
+
+MCObjectStreamer::~MCObjectStreamer() {
+ delete Assembler;
+}
+
+void MCObjectStreamer::SwitchSection(const MCSection *Section) {
+ assert(Section && "Cannot switch to a null section!");
+
+ // If already in this section, then this is a noop.
+ if (Section == CurSection) return;
+
+ CurSection = Section;
+ CurSectionData = &getAssembler().getOrCreateSectionData(*Section);
+}
+
+void MCObjectStreamer::Finish() {
+ getAssembler().Finish();
+}
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 1cbe09a..465d983 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -23,7 +23,6 @@ using namespace llvm;
AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) {
CurBuf = NULL;
CurPtr = NULL;
- TokStart = 0;
}
AsmLexer::~AsmLexer() {
@@ -40,10 +39,6 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) {
TokStart = 0;
}
-SMLoc AsmLexer::getLoc() const {
- return SMLoc::getFromPointer(TokStart);
-}
-
/// ReturnError - Set the error to the specified string at the specified
/// location. This is defined to always return AsmToken::Error.
AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) {
@@ -229,7 +224,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() {
TokStart = CurPtr;
while (!isAtStartOfComment(*CurPtr) && // Start of line comment.
- *CurPtr != ';' && // End of statement marker.
+ *CurPtr != ';' && // End of statement marker.
*CurPtr != '\n' &&
*CurPtr != '\r' &&
(*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4523eab..793f3c7 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -18,34 +18,85 @@
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmParser.h"
using namespace llvm;
+namespace {
+
+/// \brief Generic implementations of directive handling, etc. which is shared
+/// (or the default, at least) for all assembler parser.
+class GenericAsmParser : public MCAsmParserExtension {
+public:
+ GenericAsmParser() {}
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ // Debugging directives.
+ Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler(
+ &GenericAsmParser::ParseDirectiveFile));
+ Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler(
+ &GenericAsmParser::ParseDirectiveLine));
+ Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler(
+ &GenericAsmParser::ParseDirectiveLoc));
+ }
+
+ bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file"
+ bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line"
+ bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc"
+};
+
+}
+
+namespace llvm {
+
+extern MCAsmParserExtension *createDarwinAsmParser();
+extern MCAsmParserExtension *createELFAsmParser();
+
+}
enum { DEFAULT_ADDRSPACE = 0 };
-AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out,
- const MCAsmInfo &_MAI)
- : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0),
- CurBuffer(0) {
+AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
+ MCStreamer &_Out, const MCAsmInfo &_MAI)
+ : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
+ GenericParser(new GenericAsmParser), PlatformParser(0),
+ TargetParser(0), CurBuffer(0) {
Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
-
- // Debugging directives.
- AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile);
- AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine);
- AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc);
-}
+ // Initialize the generic parser.
+ GenericParser->Initialize(*this);
+ // Initialize the platform / file format parser.
+ //
+ // FIXME: This is a hack, we need to (majorly) cleanup how these objects are
+ // created.
+ if (_MAI.hasSubsectionsViaSymbols()) {
+ PlatformParser = createDarwinAsmParser();
+ PlatformParser->Initialize(*this);
+ } else {
+ PlatformParser = createELFAsmParser();
+ PlatformParser->Initialize(*this);
+ }
+}
AsmParser::~AsmParser() {
+ delete PlatformParser;
+ delete GenericParser;
+}
+
+void AsmParser::setTargetParser(TargetAsmParser &P) {
+ assert(!TargetParser && "Target parser is already initialized!");
+ TargetParser = &P;
+ TargetParser->Initialize(*this);
}
void AsmParser::Warning(SMLoc L, const Twine &Msg) {
@@ -57,11 +108,6 @@ bool AsmParser::Error(SMLoc L, const Twine &Msg) {
return true;
}
-bool AsmParser::TokError(const char *Msg) {
- PrintMessage(Lexer.getLoc(), Msg, "error");
- return true;
-}
-
void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg,
const char *Type) const {
SrcMgr.PrintMessage(Loc, Msg, Type);
@@ -163,11 +209,6 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
return false;
}
-MCSymbol *AsmParser::CreateSymbol(StringRef Name) {
- // FIXME: Inline into callers.
- return Ctx.GetOrCreateSymbol(Name);
-}
-
/// ParsePrimaryExpr - Parse a primary expression and return it.
/// primaryexpr ::= (parenexpr
/// primaryexpr ::= symbol
@@ -188,7 +229,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
case AsmToken::Identifier: {
// This is a symbol reference.
std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
- MCSymbol *Sym = CreateSymbol(Split.first);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Mark the symbol as used in an expression.
Sym->setUsedInExpr(true);
@@ -454,8 +495,8 @@ bool AsmParser::ParseStatement() {
IDVal = getTok().getString();
Lex(); // Consume the integer token to be used as an identifier token.
if (Lexer.getKind() != AsmToken::Colon) {
- if (!TheCondState.Ignore)
- return TokError("unexpected token at start of statement");
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
}
}
}
@@ -498,7 +539,7 @@ bool AsmParser::ParseStatement() {
// implicitly marked as external.
MCSymbol *Sym;
if (LocalLabelVal == -1)
- Sym = CreateSymbol(IDVal);
+ Sym = getContext().GetOrCreateSymbol(IDVal);
else
Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
if (!Sym->isUndefined() || Sym->isVariable())
@@ -530,158 +571,6 @@ bool AsmParser::ParseStatement() {
// Otherwise, we have a normal instruction or directive.
if (IDVal[0] == '.') {
- // FIXME: This should be driven based on a hash lookup and callback.
- if (IDVal == ".section")
- return ParseDirectiveDarwinSection();
- if (IDVal == ".text")
- // FIXME: This changes behavior based on the -static flag to the
- // assembler.
- return ParseDirectiveSectionSwitch("__TEXT", "__text",
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
- if (IDVal == ".const")
- return ParseDirectiveSectionSwitch("__TEXT", "__const");
- if (IDVal == ".static_const")
- return ParseDirectiveSectionSwitch("__TEXT", "__static_const");
- if (IDVal == ".cstring")
- return ParseDirectiveSectionSwitch("__TEXT","__cstring",
- MCSectionMachO::S_CSTRING_LITERALS);
- if (IDVal == ".literal4")
- return ParseDirectiveSectionSwitch("__TEXT", "__literal4",
- MCSectionMachO::S_4BYTE_LITERALS,
- 4);
- if (IDVal == ".literal8")
- return ParseDirectiveSectionSwitch("__TEXT", "__literal8",
- MCSectionMachO::S_8BYTE_LITERALS,
- 8);
- if (IDVal == ".literal16")
- return ParseDirectiveSectionSwitch("__TEXT","__literal16",
- MCSectionMachO::S_16BYTE_LITERALS,
- 16);
- if (IDVal == ".constructor")
- return ParseDirectiveSectionSwitch("__TEXT","__constructor");
- if (IDVal == ".destructor")
- return ParseDirectiveSectionSwitch("__TEXT","__destructor");
- if (IDVal == ".fvmlib_init0")
- return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0");
- if (IDVal == ".fvmlib_init1")
- return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1");
-
- // FIXME: The assembler manual claims that this has the self modify code
- // flag, at least on x86-32, but that does not appear to be correct.
- if (IDVal == ".symbol_stub")
- return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- // FIXME: Different on PPC and ARM.
- 0, 16);
- // FIXME: PowerPC only?
- if (IDVal == ".picsymbol_stub")
- return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub",
- MCSectionMachO::S_SYMBOL_STUBS |
- MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, 26);
- if (IDVal == ".data")
- return ParseDirectiveSectionSwitch("__DATA", "__data");
- if (IDVal == ".static_data")
- return ParseDirectiveSectionSwitch("__DATA", "__static_data");
-
- // FIXME: The section names of these two are misspelled in the assembler
- // manual.
- if (IDVal == ".non_lazy_symbol_pointer")
- return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr",
- MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS,
- 4);
- if (IDVal == ".lazy_symbol_pointer")
- return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr",
- MCSectionMachO::S_LAZY_SYMBOL_POINTERS,
- 4);
-
- if (IDVal == ".dyld")
- return ParseDirectiveSectionSwitch("__DATA", "__dyld");
- if (IDVal == ".mod_init_func")
- return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func",
- MCSectionMachO::S_MOD_INIT_FUNC_POINTERS,
- 4);
- if (IDVal == ".mod_term_func")
- return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func",
- MCSectionMachO::S_MOD_TERM_FUNC_POINTERS,
- 4);
- if (IDVal == ".const_data")
- return ParseDirectiveSectionSwitch("__DATA", "__const");
-
-
- if (IDVal == ".objc_class")
- return ParseDirectiveSectionSwitch("__OBJC", "__class",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_meta_class")
- return ParseDirectiveSectionSwitch("__OBJC", "__meta_class",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_cat_cls_meth")
- return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_cat_inst_meth")
- return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_protocol")
- return ParseDirectiveSectionSwitch("__OBJC", "__protocol",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_string_object")
- return ParseDirectiveSectionSwitch("__OBJC", "__string_object",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_cls_meth")
- return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_inst_meth")
- return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_cls_refs")
- return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
- MCSectionMachO::S_LITERAL_POINTERS,
- 4);
- if (IDVal == ".objc_message_refs")
- return ParseDirectiveSectionSwitch("__OBJC", "__message_refs",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
- MCSectionMachO::S_LITERAL_POINTERS,
- 4);
- if (IDVal == ".objc_symbols")
- return ParseDirectiveSectionSwitch("__OBJC", "__symbols",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_category")
- return ParseDirectiveSectionSwitch("__OBJC", "__category",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_class_vars")
- return ParseDirectiveSectionSwitch("__OBJC", "__class_vars",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_instance_vars")
- return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_module_info")
- return ParseDirectiveSectionSwitch("__OBJC", "__module_info",
- MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
- if (IDVal == ".objc_class_names")
- return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
- MCSectionMachO::S_CSTRING_LITERALS);
- if (IDVal == ".objc_meth_var_types")
- return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
- MCSectionMachO::S_CSTRING_LITERALS);
- if (IDVal == ".objc_meth_var_names")
- return ParseDirectiveSectionSwitch("__TEXT", "__cstring",
- MCSectionMachO::S_CSTRING_LITERALS);
- if (IDVal == ".objc_selector_strs")
- return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs",
- MCSectionMachO::S_CSTRING_LITERALS);
-
- if (IDVal == ".tdata")
- return ParseDirectiveSectionSwitch("__DATA", "__thread_data",
- MCSectionMachO::S_THREAD_LOCAL_REGULAR);
- if (IDVal == ".tlv")
- return ParseDirectiveSectionSwitch("__DATA", "__thread_vars",
- MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
- if (IDVal == ".thread_init_func")
- return ParseDirectiveSectionSwitch("__DATA", "__thread_init",
- MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
-
// Assembler features
if (IDVal == ".set")
return ParseDirectiveSet();
@@ -756,36 +645,25 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition);
if (IDVal == ".weak_reference")
return ParseDirectiveSymbolAttribute(MCSA_WeakReference);
+ if (IDVal == ".weak_def_can_be_hidden")
+ return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
if (IDVal == ".comm")
return ParseDirectiveComm(/*IsLocal=*/false);
if (IDVal == ".lcomm")
return ParseDirectiveComm(/*IsLocal=*/true);
- if (IDVal == ".zerofill")
- return ParseDirectiveDarwinZerofill();
- if (IDVal == ".desc")
- return ParseDirectiveDarwinSymbolDesc();
- if (IDVal == ".lsym")
- return ParseDirectiveDarwinLsym();
- if (IDVal == ".tbss")
- return ParseDirectiveDarwinTBSS();
-
- if (IDVal == ".subsections_via_symbols")
- return ParseDirectiveDarwinSubsectionsViaSymbols();
+
if (IDVal == ".abort")
return ParseDirectiveAbort();
if (IDVal == ".include")
return ParseDirectiveInclude();
- if (IDVal == ".dump")
- return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true);
- if (IDVal == ".load")
- return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false);
-
- // Look up the handler in the handler table,
- bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal];
- if (Handler)
- return (this->*Handler)(IDVal, IDLoc);
-
+
+ // Look up the handler in the handler table.
+ std::pair<MCAsmParserExtension*, DirectiveHandler> Handler =
+ DirectiveMap.lookup(IDVal);
+ if (Handler.first)
+ return (Handler.first->*Handler.second)(IDVal, IDLoc);
+
// Target hook for parsing target specific directives.
if (!getTargetParser().ParseDirective(ID))
return false;
@@ -839,7 +717,6 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
SMLoc EqualLoc = Lexer.getLoc();
const MCExpr *Value;
- SMLoc StartLoc = Lexer.getLoc();
if (ParseExpression(Value))
return true;
@@ -867,7 +744,7 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
Name + "'");
} else
- Sym = CreateSymbol(Name);
+ Sym = getContext().GetOrCreateSymbol(Name);
// FIXME: Handle '.'.
@@ -902,90 +779,15 @@ bool AsmParser::ParseDirectiveSet() {
if (ParseIdentifier(Name))
return TokError("expected identifier after '.set' directive");
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.set'");
Lex();
return ParseAssignment(Name);
}
-/// ParseDirectiveSection:
-/// ::= .section identifier (',' identifier)*
-/// FIXME: This should actually parse out the segment, section, attributes and
-/// sizeof_stub fields.
-bool AsmParser::ParseDirectiveDarwinSection() {
- SMLoc Loc = Lexer.getLoc();
-
- StringRef SectionName;
- if (ParseIdentifier(SectionName))
- return Error(Loc, "expected identifier after '.section' directive");
-
- // Verify there is a following comma.
- if (!Lexer.is(AsmToken::Comma))
- return TokError("unexpected token in '.section' directive");
-
- std::string SectionSpec = SectionName;
- SectionSpec += ",";
-
- // Add all the tokens until the end of the line, ParseSectionSpecifier will
- // handle this.
- StringRef EOL = Lexer.LexUntilEndOfStatement();
- SectionSpec.append(EOL.begin(), EOL.end());
-
- Lex();
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.section' directive");
- Lex();
-
-
- StringRef Segment, Section;
- unsigned TAA, StubSize;
- std::string ErrorStr =
- MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
- TAA, StubSize);
-
- if (!ErrorStr.empty())
- return Error(Loc, ErrorStr.c_str());
-
- // FIXME: Arch specific.
- bool isText = Segment == "__TEXT"; // FIXME: Hack.
- Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
- isText ? SectionKind::getText()
- : SectionKind::getDataRel()));
- return false;
-}
-
-/// ParseDirectiveSectionSwitch -
-bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
- const char *Section,
- unsigned TAA, unsigned Align,
- unsigned StubSize) {
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in section switching directive");
- Lex();
-
- // FIXME: Arch specific.
- bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack.
- Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize,
- isText ? SectionKind::getText()
- : SectionKind::getDataRel()));
-
- // Set the implicit alignment, if any.
- //
- // FIXME: This isn't really what 'as' does; I think it just uses the implicit
- // alignment on the section (e.g., if one manually inserts bytes into the
- // section, then just issueing the section switch directive will not realign
- // the section. However, this is arguably more reasonable behavior, and there
- // is no good reason for someone to intentionally emit incorrectly sized
- // values into the implicitly aligned sections.
- if (Align)
- Out.EmitValueToAlignment(Align, 0, 1, 0);
-
- return false;
-}
-
bool AsmParser::ParseEscapedString(std::string &Data) {
- assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
+ assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
Data = "";
StringRef Str = getTok().getStringContents();
@@ -1045,25 +847,25 @@ bool AsmParser::ParseEscapedString(std::string &Data) {
/// ParseDirectiveAscii:
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
- if (Lexer.isNot(AsmToken::String))
+ if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.ascii' or '.asciz' directive");
-
+
std::string Data;
if (ParseEscapedString(Data))
return true;
-
- Out.EmitBytes(Data, DEFAULT_ADDRSPACE);
+
+ getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE);
if (ZeroTerminated)
- Out.EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
-
+ getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE);
+
Lex();
-
- if (Lexer.is(AsmToken::EndOfStatement))
+
+ if (getLexer().is(AsmToken::EndOfStatement))
break;
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.ascii' or '.asciz' directive");
Lex();
}
@@ -1076,24 +878,24 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
/// ParseDirectiveValue
/// ::= (.byte | .short | ... ) [ expression (, expression)* ]
bool AsmParser::ParseDirectiveValue(unsigned Size) {
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
const MCExpr *Value;
- SMLoc ATTRIBUTE_UNUSED StartLoc = Lexer.getLoc();
+ SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc();
if (ParseExpression(Value))
return true;
// Special case constant expressions to match code generator.
if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
- Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
+ getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
else
- Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+ getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
- if (Lexer.is(AsmToken::EndOfStatement))
+ if (getLexer().is(AsmToken::EndOfStatement))
break;
// FIXME: Improve diagnostic.
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
}
@@ -1111,18 +913,15 @@ bool AsmParser::ParseDirectiveSpace() {
return true;
int64_t FillExpr = 0;
- bool HasFillExpr = false;
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.space' directive");
Lex();
if (ParseAbsoluteExpression(FillExpr))
return true;
- HasFillExpr = true;
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.space' directive");
}
@@ -1132,7 +931,7 @@ bool AsmParser::ParseDirectiveSpace() {
return TokError("invalid number of bytes in '.space' directive");
// FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
- Out.EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
+ getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE);
return false;
}
@@ -1144,7 +943,7 @@ bool AsmParser::ParseDirectiveFill() {
if (ParseAbsoluteExpression(NumValues))
return true;
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
@@ -1152,7 +951,7 @@ bool AsmParser::ParseDirectiveFill() {
if (ParseAbsoluteExpression(FillSize))
return true;
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.fill' directive");
Lex();
@@ -1160,7 +959,7 @@ bool AsmParser::ParseDirectiveFill() {
if (ParseAbsoluteExpression(FillExpr))
return true;
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.fill' directive");
Lex();
@@ -1169,7 +968,7 @@ bool AsmParser::ParseDirectiveFill() {
return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
for (uint64_t i = 0, e = NumValues; i != e; ++i)
- Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
+ getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
return false;
}
@@ -1178,21 +977,20 @@ bool AsmParser::ParseDirectiveFill() {
/// ::= .org expression [ , expression ]
bool AsmParser::ParseDirectiveOrg() {
const MCExpr *Offset;
- SMLoc StartLoc = Lexer.getLoc();
if (ParseExpression(Offset))
return true;
// Parse optional fill expression.
int64_t FillExpr = 0;
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.org' directive");
Lex();
if (ParseAbsoluteExpression(FillExpr))
return true;
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.org' directive");
}
@@ -1200,7 +998,7 @@ bool AsmParser::ParseDirectiveOrg() {
// FIXME: Only limited forms of relocatable expressions are accepted here, it
// has to be relative to the current section.
- Out.EmitValueToOffset(Offset, FillExpr);
+ getStreamer().EmitValueToOffset(Offset, FillExpr);
return false;
}
@@ -1208,7 +1006,7 @@ bool AsmParser::ParseDirectiveOrg() {
/// ParseDirectiveAlign
/// ::= {.align, ...} expression [ , expression [ , expression ]]
bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
- SMLoc AlignmentLoc = Lexer.getLoc();
+ SMLoc AlignmentLoc = getLexer().getLoc();
int64_t Alignment;
if (ParseAbsoluteExpression(Alignment))
return true;
@@ -1217,30 +1015,30 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
bool HasFillExpr = false;
int64_t FillExpr = 0;
int64_t MaxBytesToFill = 0;
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
// The fill expression can be omitted while specifying a maximum number of
// alignment bytes, e.g:
// .align 3,,4
- if (Lexer.isNot(AsmToken::Comma)) {
+ if (getLexer().isNot(AsmToken::Comma)) {
HasFillExpr = true;
if (ParseAbsoluteExpression(FillExpr))
return true;
}
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
- MaxBytesLoc = Lexer.getLoc();
+ MaxBytesLoc = getLexer().getLoc();
if (ParseAbsoluteExpression(MaxBytesToFill))
return true;
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in directive");
}
}
@@ -1282,14 +1080,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
// FIXME: This should be using a target hook.
bool UseCodeAlign = false;
if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
- Out.getCurrentSection()))
+ getStreamer().getCurrentSection()))
UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
ValueSize == 1 && UseCodeAlign) {
- Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
+ getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill);
} else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
- Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
}
return false;
@@ -1298,21 +1096,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
/// ParseDirectiveSymbolAttribute
/// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
for (;;) {
StringRef Name;
if (ParseIdentifier(Name))
return TokError("expected identifier in directive");
- MCSymbol *Sym = CreateSymbol(Name);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- Out.EmitSymbolAttribute(Sym, Attr);
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
- if (Lexer.is(AsmToken::EndOfStatement))
+ if (getLexer().is(AsmToken::EndOfStatement))
break;
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
}
@@ -1330,20 +1128,20 @@ bool AsmParser::ParseDirectiveELFType() {
return TokError("expected identifier in directive");
// Handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(Name);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in '.type' directive");
Lex();
- if (Lexer.isNot(AsmToken::At))
+ if (getLexer().isNot(AsmToken::At))
return TokError("expected '@' before type");
Lex();
StringRef Type;
SMLoc TypeLoc;
- TypeLoc = Lexer.getLoc();
+ TypeLoc = getLexer().getLoc();
if (ParseIdentifier(Type))
return TokError("expected symbol type in directive");
@@ -1358,42 +1156,12 @@ bool AsmParser::ParseDirectiveELFType() {
if (Attr == MCSA_Invalid)
return Error(TypeLoc, "unsupported attribute in '.type' directive");
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.type' directive");
Lex();
- Out.EmitSymbolAttribute(Sym, Attr);
-
- return false;
-}
-
-/// ParseDirectiveDarwinSymbolDesc
-/// ::= .desc identifier , expression
-bool AsmParser::ParseDirectiveDarwinSymbolDesc() {
- StringRef Name;
- if (ParseIdentifier(Name))
- return TokError("expected identifier in directive");
-
- // Handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(Name);
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in '.desc' directive");
- Lex();
-
- SMLoc DescLoc = Lexer.getLoc();
- int64_t DescValue;
- if (ParseAbsoluteExpression(DescValue))
- return true;
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.desc' directive");
-
- Lex();
-
- // Set the n_desc field of this Symbol to this DescValue
- Out.EmitSymbolDesc(Sym, DescValue);
+ getStreamer().EmitSymbolAttribute(Sym, Attr);
return false;
}
@@ -1401,28 +1169,28 @@ bool AsmParser::ParseDirectiveDarwinSymbolDesc() {
/// ParseDirectiveComm
/// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
bool AsmParser::ParseDirectiveComm(bool IsLocal) {
- SMLoc IDLoc = Lexer.getLoc();
+ SMLoc IDLoc = getLexer().getLoc();
StringRef Name;
if (ParseIdentifier(Name))
return TokError("expected identifier in directive");
// Handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(Name);
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
- if (Lexer.isNot(AsmToken::Comma))
+ if (getLexer().isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
int64_t Size;
- SMLoc SizeLoc = Lexer.getLoc();
+ SMLoc SizeLoc = getLexer().getLoc();
if (ParseAbsoluteExpression(Size))
return true;
int64_t Pow2Alignment = 0;
SMLoc Pow2AlignmentLoc;
- if (Lexer.is(AsmToken::Comma)) {
+ if (getLexer().is(AsmToken::Comma)) {
Lex();
- Pow2AlignmentLoc = Lexer.getLoc();
+ Pow2AlignmentLoc = getLexer().getLoc();
if (ParseAbsoluteExpression(Pow2Alignment))
return true;
@@ -1434,7 +1202,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
}
}
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.comm' or '.lcomm' directive");
Lex();
@@ -1458,168 +1226,14 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
// '.lcomm' is equivalent to '.zerofill'.
// Create the Symbol as a common or local common with Size and Pow2Alignment
if (IsLocal) {
- Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss",
- MCSectionMachO::S_ZEROFILL, 0,
- SectionKind::getBSS()),
- Sym, Size, 1 << Pow2Alignment);
+ getStreamer().EmitZerofill(Ctx.getMachOSection(
+ "__DATA", "__bss", MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Sym, Size, 1 << Pow2Alignment);
return false;
}
- Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
- return false;
-}
-
-/// ParseDirectiveDarwinZerofill
-/// ::= .zerofill segname , sectname [, identifier , size_expression [
-/// , align_expression ]]
-bool AsmParser::ParseDirectiveDarwinZerofill() {
- StringRef Segment;
- if (ParseIdentifier(Segment))
- return TokError("expected segment name after '.zerofill' directive");
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
-
- StringRef Section;
- if (ParseIdentifier(Section))
- return TokError("expected section name after comma in '.zerofill' "
- "directive");
-
- // If this is the end of the line all that was wanted was to create the
- // the section but with no symbol.
- if (Lexer.is(AsmToken::EndOfStatement)) {
- // Create the zerofill section but no symbol
- Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
- MCSectionMachO::S_ZEROFILL, 0,
- SectionKind::getBSS()));
- return false;
- }
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
-
- SMLoc IDLoc = Lexer.getLoc();
- StringRef IDStr;
- if (ParseIdentifier(IDStr))
- return TokError("expected identifier in directive");
-
- // handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(IDStr);
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
-
- int64_t Size;
- SMLoc SizeLoc = Lexer.getLoc();
- if (ParseAbsoluteExpression(Size))
- return true;
-
- int64_t Pow2Alignment = 0;
- SMLoc Pow2AlignmentLoc;
- if (Lexer.is(AsmToken::Comma)) {
- Lex();
- Pow2AlignmentLoc = Lexer.getLoc();
- if (ParseAbsoluteExpression(Pow2Alignment))
- return true;
- }
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.zerofill' directive");
-
- Lex();
-
- if (Size < 0)
- return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
- "than zero");
-
- // NOTE: The alignment in the directive is a power of 2 value, the assembler
- // may internally end up wanting an alignment in bytes.
- // FIXME: Diagnose overflow.
- if (Pow2Alignment < 0)
- return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
- "can't be less than zero");
-
- if (!Sym->isUndefined())
- return Error(IDLoc, "invalid symbol redefinition");
-
- // Create the zerofill Symbol with Size and Pow2Alignment
- //
- // FIXME: Arch specific.
- Out.EmitZerofill(Ctx.getMachOSection(Segment, Section,
- MCSectionMachO::S_ZEROFILL, 0,
- SectionKind::getBSS()),
- Sym, Size, 1 << Pow2Alignment);
-
- return false;
-}
-
-/// ParseDirectiveDarwinTBSS
-/// ::= .tbss identifier, size, align
-bool AsmParser::ParseDirectiveDarwinTBSS() {
- SMLoc IDLoc = Lexer.getLoc();
- StringRef Name;
- if (ParseIdentifier(Name))
- return TokError("expected identifier in directive");
-
- // Handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(Name);
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in directive");
- Lex();
-
- int64_t Size;
- SMLoc SizeLoc = Lexer.getLoc();
- if (ParseAbsoluteExpression(Size))
- return true;
-
- int64_t Pow2Alignment = 0;
- SMLoc Pow2AlignmentLoc;
- if (Lexer.is(AsmToken::Comma)) {
- Lex();
- Pow2AlignmentLoc = Lexer.getLoc();
- if (ParseAbsoluteExpression(Pow2Alignment))
- return true;
- }
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.tbss' directive");
-
- Lex();
-
- if (Size < 0)
- return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
- "zero");
-
- // FIXME: Diagnose overflow.
- if (Pow2Alignment < 0)
- return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
- "than zero");
-
- if (!Sym->isUndefined())
- return Error(IDLoc, "invalid symbol redefinition");
-
- Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss",
- MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
- 0, SectionKind::getThreadBSS()),
- Sym, Size, 1 << Pow2Alignment);
-
- return false;
-}
-
-/// ParseDirectiveDarwinSubsectionsViaSymbols
-/// ::= .subsections_via_symbols
-bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() {
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.subsections_via_symbols' directive");
-
- Lex();
-
- Out.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
-
+ getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment);
return false;
}
@@ -1627,11 +1241,11 @@ bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() {
/// ::= .abort [ "abort_string" ]
bool AsmParser::ParseDirectiveAbort() {
// FIXME: Use loc from directive.
- SMLoc Loc = Lexer.getLoc();
+ SMLoc Loc = getLexer().getLoc();
StringRef Str = "";
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::String))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.abort' directive");
Str = getTok().getString();
@@ -1639,7 +1253,7 @@ bool AsmParser::ParseDirectiveAbort() {
Lex();
}
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.abort' directive");
Lex();
@@ -1653,48 +1267,17 @@ bool AsmParser::ParseDirectiveAbort() {
return false;
}
-/// ParseDirectiveLsym
-/// ::= .lsym identifier , expression
-bool AsmParser::ParseDirectiveDarwinLsym() {
- StringRef Name;
- if (ParseIdentifier(Name))
- return TokError("expected identifier in directive");
-
- // Handle the identifier as the key symbol.
- MCSymbol *Sym = CreateSymbol(Name);
-
- if (Lexer.isNot(AsmToken::Comma))
- return TokError("unexpected token in '.lsym' directive");
- Lex();
-
- const MCExpr *Value;
- SMLoc StartLoc = Lexer.getLoc();
- if (ParseExpression(Value))
- return true;
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.lsym' directive");
-
- Lex();
-
- // We don't currently support this directive.
- //
- // FIXME: Diagnostic location!
- (void) Sym;
- return TokError("directive '.lsym' is unsupported");
-}
-
/// ParseDirectiveInclude
/// ::= .include "filename"
bool AsmParser::ParseDirectiveInclude() {
- if (Lexer.isNot(AsmToken::String))
+ if (getLexer().isNot(AsmToken::String))
return TokError("expected string in '.include' directive");
std::string Filename = getTok().getString();
- SMLoc IncludeLoc = Lexer.getLoc();
+ SMLoc IncludeLoc = getLexer().getLoc();
Lex();
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.include' directive");
// Strip the quotes.
@@ -1712,29 +1295,6 @@ bool AsmParser::ParseDirectiveInclude() {
return false;
}
-/// ParseDirectiveDarwinDumpOrLoad
-/// ::= ( .dump | .load ) "filename"
-bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) {
- if (Lexer.isNot(AsmToken::String))
- return TokError("expected string in '.dump' or '.load' directive");
-
- Lex();
-
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.dump' or '.load' directive");
-
- Lex();
-
- // FIXME: If/when .dump and .load are implemented they will be done in the
- // the assembly parser and not have any need for an MCStreamer API.
- if (IsDump)
- Warning(IDLoc, "ignoring directive .dump for now");
- else
- Warning(IDLoc, "ignoring directive .load for now");
-
- return false;
-}
-
/// ParseDirectiveIf
/// ::= .if expression
bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
@@ -1748,7 +1308,7 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) {
if (ParseAbsoluteExpression(ExprValue))
return true;
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.if' directive");
Lex();
@@ -1781,7 +1341,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
if (ParseAbsoluteExpression(ExprValue))
return true;
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.elseif' directive");
Lex();
@@ -1795,7 +1355,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) {
/// ParseDirectiveElse
/// ::= .else
bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.else' directive");
Lex();
@@ -1819,7 +1379,7 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) {
/// ParseDirectiveEndIf
/// ::= .endif
bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.endif' directive");
Lex();
@@ -1838,40 +1398,40 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) {
/// ParseDirectiveFile
/// ::= .file [number] string
-bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
+bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) {
// FIXME: I'm not sure what this is.
int64_t FileNumber = -1;
- if (Lexer.is(AsmToken::Integer)) {
+ if (getLexer().is(AsmToken::Integer)) {
FileNumber = getTok().getIntVal();
Lex();
-
+
if (FileNumber < 1)
return TokError("file number less than one");
}
- if (Lexer.isNot(AsmToken::String))
+ if (getLexer().isNot(AsmToken::String))
return TokError("unexpected token in '.file' directive");
-
+
StringRef Filename = getTok().getString();
Filename = Filename.substr(1, Filename.size()-2);
Lex();
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.file' directive");
if (FileNumber == -1)
- Out.EmitFileDirective(Filename);
+ getStreamer().EmitFileDirective(Filename);
else
- Out.EmitDwarfFileDirective(FileNumber, Filename);
-
+ getStreamer().EmitDwarfFileDirective(FileNumber, Filename);
+
return false;
}
/// ParseDirectiveLine
/// ::= .line [number]
-bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Integer))
+bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.line' directive");
int64_t LineNumber = getTok().getIntVal();
@@ -1881,8 +1441,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
// FIXME: Do something with the .line.
}
- if (Lexer.isNot(AsmToken::EndOfStatement))
- return TokError("unexpected token in '.file' directive");
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.line' directive");
return false;
}
@@ -1890,8 +1450,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) {
/// ParseDirectiveLoc
/// ::= .loc number [number [number]]
-bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
- if (Lexer.isNot(AsmToken::Integer))
+bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
+ if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
// FIXME: What are these fields?
@@ -1900,16 +1460,16 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
// FIXME: Validate file.
Lex();
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Integer))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
int64_t Param2 = getTok().getIntVal();
(void) Param2;
Lex();
- if (Lexer.isNot(AsmToken::EndOfStatement)) {
- if (Lexer.isNot(AsmToken::Integer))
+ if (getLexer().isNot(AsmToken::EndOfStatement)) {
+ if (getLexer().isNot(AsmToken::Integer))
return TokError("unexpected token in '.loc' directive");
int64_t Param3 = getTok().getIntVal();
@@ -1920,7 +1480,7 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) {
}
}
- if (Lexer.isNot(AsmToken::EndOfStatement))
+ if (getLexer().isNot(AsmToken::EndOfStatement))
return TokError("unexpected token in '.file' directive");
return false;
diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt
index a5c0818..25a7bf4 100644
--- a/lib/MC/MCParser/CMakeLists.txt
+++ b/lib/MC/MCParser/CMakeLists.txt
@@ -1,7 +1,10 @@
add_llvm_library(LLVMMCParser
AsmLexer.cpp
AsmParser.cpp
+ DarwinAsmParser.cpp
+ ELFAsmParser.cpp
MCAsmLexer.cpp
MCAsmParser.cpp
+ MCAsmParserExtension.cpp
TargetAsmParser.cpp
)
diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp
new file mode 100644
index 0000000..7d8639e
--- /dev/null
+++ b/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -0,0 +1,758 @@
+//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+using namespace llvm;
+
+namespace {
+
+/// \brief Implementation of directive handling which is shared across all
+/// Darwin targets.
+class DarwinAsmParser : public MCAsmParserExtension {
+ bool ParseSectionSwitch(const char *Segment, const char *Section,
+ unsigned TAA = 0, unsigned ImplicitAlign = 0,
+ unsigned StubSize = 0);
+
+public:
+ DarwinAsmParser() {}
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveDesc));
+ Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveLsym));
+ Parser.AddDirectiveHandler(this, ".subsections_via_symbols",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols));
+ Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveDumpOrLoad));
+ Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveDumpOrLoad));
+ Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveSection));
+ Parser.AddDirectiveHandler(this, ".secure_log_unique",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveSecureLogUnique));
+ Parser.AddDirectiveHandler(this, ".secure_log_reset",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveSecureLogReset));
+ Parser.AddDirectiveHandler(this, ".tbss",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveTBSS));
+ Parser.AddDirectiveHandler(this, ".zerofill",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseDirectiveZerofill));
+
+ // Special section directives.
+ Parser.AddDirectiveHandler(this, ".const",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveConst));
+ Parser.AddDirectiveHandler(this, ".const_data",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveConstData));
+ Parser.AddDirectiveHandler(this, ".constructor",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveConstructor));
+ Parser.AddDirectiveHandler(this, ".cstring",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveCString));
+ Parser.AddDirectiveHandler(this, ".data",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveData));
+ Parser.AddDirectiveHandler(this, ".destructor",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveDestructor));
+ Parser.AddDirectiveHandler(this, ".dyld",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveDyld));
+ Parser.AddDirectiveHandler(this, ".fvmlib_init0",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0));
+ Parser.AddDirectiveHandler(this, ".fvmlib_init1",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1));
+ Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers));
+ Parser.AddDirectiveHandler(this, ".literal16",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveLiteral16));
+ Parser.AddDirectiveHandler(this, ".literal4",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveLiteral4));
+ Parser.AddDirectiveHandler(this, ".literal8",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveLiteral8));
+ Parser.AddDirectiveHandler(this, ".mod_init_func",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveModInitFunc));
+ Parser.AddDirectiveHandler(this, ".mod_term_func",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveModTermFunc));
+ Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers));
+ Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth));
+ Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth));
+ Parser.AddDirectiveHandler(this, ".objc_category",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCCategory));
+ Parser.AddDirectiveHandler(this, ".objc_class",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCClass));
+ Parser.AddDirectiveHandler(this, ".objc_class_names",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCClassNames));
+ Parser.AddDirectiveHandler(this, ".objc_class_vars",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCClassVars));
+ Parser.AddDirectiveHandler(this, ".objc_cls_meth",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth));
+ Parser.AddDirectiveHandler(this, ".objc_cls_refs",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs));
+ Parser.AddDirectiveHandler(this, ".objc_inst_meth",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth));
+ Parser.AddDirectiveHandler(this, ".objc_instance_vars",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars));
+ Parser.AddDirectiveHandler(this, ".objc_message_refs",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs));
+ Parser.AddDirectiveHandler(this, ".objc_meta_class",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass));
+ Parser.AddDirectiveHandler(this, ".objc_meth_var_names",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames));
+ Parser.AddDirectiveHandler(this, ".objc_meth_var_types",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes));
+ Parser.AddDirectiveHandler(this, ".objc_module_info",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo));
+ Parser.AddDirectiveHandler(this, ".objc_protocol",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCProtocol));
+ Parser.AddDirectiveHandler(this, ".objc_selector_strs",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs));
+ Parser.AddDirectiveHandler(this, ".objc_string_object",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCStringObject));
+ Parser.AddDirectiveHandler(this, ".objc_symbols",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveObjCSymbols));
+ Parser.AddDirectiveHandler(this, ".picsymbol_stub",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectivePICSymbolStub));
+ Parser.AddDirectiveHandler(this, ".static_const",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveStaticConst));
+ Parser.AddDirectiveHandler(this, ".static_data",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveStaticData));
+ Parser.AddDirectiveHandler(this, ".symbol_stub",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveSymbolStub));
+ Parser.AddDirectiveHandler(this, ".tdata",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveTData));
+ Parser.AddDirectiveHandler(this, ".text",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveText));
+ Parser.AddDirectiveHandler(this, ".thread_init_func",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc));
+ Parser.AddDirectiveHandler(this, ".tlv",
+ MCAsmParser::DirectiveHandler(
+ &DarwinAsmParser::ParseSectionDirectiveTLV));
+ }
+
+ bool ParseDirectiveDesc(StringRef, SMLoc);
+ bool ParseDirectiveDumpOrLoad(StringRef, SMLoc);
+ bool ParseDirectiveLsym(StringRef, SMLoc);
+ bool ParseDirectiveSection();
+ bool ParseDirectiveSecureLogReset(StringRef, SMLoc);
+ bool ParseDirectiveSecureLogUnique(StringRef, SMLoc);
+ bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc);
+ bool ParseDirectiveTBSS(StringRef, SMLoc);
+ bool ParseDirectiveZerofill(StringRef, SMLoc);
+
+ // Named Section Directive
+ bool ParseSectionDirectiveConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__const");
+ }
+ bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__static_const");
+ }
+ bool ParseSectionDirectiveCString(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal4",
+ MCSectionMachO::S_4BYTE_LITERALS, 4);
+ }
+ bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__literal8",
+ MCSectionMachO::S_8BYTE_LITERALS, 8);
+ }
+ bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__literal16",
+ MCSectionMachO::S_16BYTE_LITERALS, 16);
+ }
+ bool ParseSectionDirectiveConstructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__constructor");
+ }
+ bool ParseSectionDirectiveDestructor(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__destructor");
+ }
+ bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init0");
+ }
+ bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__fvmlib_init1");
+ }
+ bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__symbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
+ // FIXME: Different on PPC and ARM.
+ 0, 16);
+ }
+ bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT","__picsymbol_stub",
+ MCSectionMachO::S_SYMBOL_STUBS |
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26);
+ }
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__data");
+ }
+ bool ParseSectionDirectiveStaticData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__static_data");
+ }
+ bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__nl_symbol_ptr",
+ MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__la_symbol_ptr",
+ MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveDyld(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__dyld");
+ }
+ bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_init_func",
+ MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__mod_term_func",
+ MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveConstData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__const");
+ }
+ bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__meta_class",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cat_inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__protocol",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__string_object",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__inst_meth",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__cls_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__message_refs",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP |
+ MCSectionMachO::S_LITERAL_POINTERS, 4);
+ }
+ bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__symbols",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__category",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__class_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__instance_vars",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__module_info",
+ MCSectionMachO::S_ATTR_NO_DEAD_STRIP);
+ }
+ bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__cstring",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) {
+ return ParseSectionSwitch("__OBJC", "__selector_strs",
+ MCSectionMachO::S_CSTRING_LITERALS);
+ }
+ bool ParseSectionDirectiveTData(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch("__TEXT", "__text",
+ MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ }
+ bool ParseSectionDirectiveTLV(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+ }
+ bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) {
+ return ParseSectionSwitch("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+ }
+
+};
+
+}
+
+bool DarwinAsmParser::ParseSectionSwitch(const char *Segment,
+ const char *Section,
+ unsigned TAA, unsigned Align,
+ unsigned StubSize) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ // FIXME: Arch specific.
+ bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack.
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+
+ // Set the implicit alignment, if any.
+ //
+ // FIXME: This isn't really what 'as' does; I think it just uses the implicit
+ // alignment on the section (e.g., if one manually inserts bytes into the
+ // section, then just issueing the section switch directive will not realign
+ // the section. However, this is arguably more reasonable behavior, and there
+ // is no good reason for someone to intentionally emit incorrectly sized
+ // values into the implicitly aligned sections.
+ if (Align)
+ getStreamer().EmitValueToAlignment(Align, 0, 1, 0);
+
+ return false;
+}
+
+/// ParseDirectiveDesc
+/// ::= .desc identifier , expression
+bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.desc' directive");
+ Lex();
+
+ int64_t DescValue;
+ if (getParser().ParseAbsoluteExpression(DescValue))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.desc' directive");
+
+ Lex();
+
+ // Set the n_desc field of this Symbol to this DescValue
+ getStreamer().EmitSymbolDesc(Sym, DescValue);
+
+ return false;
+}
+
+/// ParseDirectiveDumpOrLoad
+/// ::= ( .dump | .load ) "filename"
+bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive,
+ SMLoc IDLoc) {
+ bool IsDump = Directive == ".dump";
+ if (getLexer().isNot(AsmToken::String))
+ return TokError("expected string in '.dump' or '.load' directive");
+
+ Lex();
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.dump' or '.load' directive");
+
+ Lex();
+
+ // FIXME: If/when .dump and .load are implemented they will be done in the
+ // the assembly parser and not have any need for an MCStreamer API.
+ if (IsDump)
+ Warning(IDLoc, "ignoring directive .dump for now");
+ else
+ Warning(IDLoc, "ignoring directive .load for now");
+
+ return false;
+}
+
+/// ParseDirectiveLsym
+/// ::= .lsym identifier , expression
+bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) {
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.lsym' directive");
+ Lex();
+
+ const MCExpr *Value;
+ if (getParser().ParseExpression(Value))
+ return true;
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.lsym' directive");
+
+ Lex();
+
+ // We don't currently support this directive.
+ //
+ // FIXME: Diagnostic location!
+ (void) Sym;
+ return TokError("directive '.lsym' is unsupported");
+}
+
+/// ParseDirectiveSection:
+/// ::= .section identifier (',' identifier)*
+bool DarwinAsmParser::ParseDirectiveSection() {
+ SMLoc Loc = getLexer().getLoc();
+
+ StringRef SectionName;
+ if (getParser().ParseIdentifier(SectionName))
+ return Error(Loc, "expected identifier after '.section' directive");
+
+ // Verify there is a following comma.
+ if (!getLexer().is(AsmToken::Comma))
+ return TokError("unexpected token in '.section' directive");
+
+ std::string SectionSpec = SectionName;
+ SectionSpec += ",";
+
+ // Add all the tokens until the end of the line, ParseSectionSpecifier will
+ // handle this.
+ StringRef EOL = getLexer().LexUntilEndOfStatement();
+ SectionSpec.append(EOL.begin(), EOL.end());
+
+ Lex();
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.section' directive");
+ Lex();
+
+
+ StringRef Segment, Section;
+ unsigned TAA, StubSize;
+ std::string ErrorStr =
+ MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section,
+ TAA, StubSize);
+
+ if (!ErrorStr.empty())
+ return Error(Loc, ErrorStr.c_str());
+
+ // FIXME: Arch specific.
+ bool isText = Segment == "__TEXT"; // FIXME: Hack.
+ getStreamer().SwitchSection(getContext().getMachOSection(
+ Segment, Section, TAA, StubSize,
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
+ return false;
+}
+
+/// ParseDirectiveSecureLogUnique
+/// ::= .secure_log_unique "log message"
+bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
+ std::string LogMessage;
+
+ if (getLexer().isNot(AsmToken::String))
+ LogMessage = "";
+ else{
+ LogMessage = getTok().getString();
+ Lex();
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_unique' directive");
+
+ if (getContext().getSecureLogUsed() != false)
+ return Error(IDLoc, ".secure_log_unique specified multiple times");
+
+ char *SecureLogFile = getContext().getSecureLogFile();
+ if (SecureLogFile == NULL)
+ return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE "
+ "environment variable unset.");
+
+ raw_ostream *OS = getContext().getSecureLog();
+ if (OS == NULL) {
+ std::string Err;
+ OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append);
+ if (!Err.empty()) {
+ delete OS;
+ return Error(IDLoc, Twine("can't open secure log file: ") +
+ SecureLogFile + " (" + Err + ")");
+ }
+ getContext().setSecureLog(OS);
+ }
+
+ int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc);
+ *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":"
+ << LogMessage + "\n";
+
+ getContext().setSecureLogUsed(true);
+
+ return false;
+}
+
+/// ParseDirectiveSecureLogReset
+/// ::= .secure_log_reset
+bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.secure_log_reset' directive");
+
+ Lex();
+
+ getContext().setSecureLogUsed(false);
+
+ return false;
+}
+
+/// ParseDirectiveSubsectionsViaSymbols
+/// ::= .subsections_via_symbols
+bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.subsections_via_symbols' directive");
+
+ Lex();
+
+ getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols);
+
+ return false;
+}
+
+/// ParseDirectiveTBSS
+/// ::= .tbss identifier, size, align
+bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) {
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef Name;
+ if (getParser().ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(Name);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.tbss' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+ "zero");
+
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+ "than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ getStreamer().EmitTBSSSymbol(getContext().getMachOSection(
+ "__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ 0, SectionKind::getThreadBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+/// ParseDirectiveZerofill
+/// ::= .zerofill segname , sectname [, identifier , size_expression [
+/// , align_expression ]]
+bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) {
+ StringRef Segment;
+ if (getParser().ParseIdentifier(Segment))
+ return TokError("expected segment name after '.zerofill' directive");
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ StringRef Section;
+ if (getParser().ParseIdentifier(Section))
+ return TokError("expected section name after comma in '.zerofill' "
+ "directive");
+
+ // If this is the end of the line all that was wanted was to create the
+ // the section but with no symbol.
+ if (getLexer().is(AsmToken::EndOfStatement)) {
+ // Create the zerofill section but no symbol
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()));
+ return false;
+ }
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ SMLoc IDLoc = getLexer().getLoc();
+ StringRef IDStr;
+ if (getParser().ParseIdentifier(IDStr))
+ return TokError("expected identifier in directive");
+
+ // handle the identifier as the key symbol.
+ MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr);
+
+ if (getLexer().isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (getLexer().is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = getLexer().getLoc();
+ if (getParser().ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.zerofill' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
+ "than zero");
+
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
+ // may internally end up wanting an alignment in bytes.
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, "
+ "can't be less than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ // Create the zerofill Symbol with Size and Pow2Alignment
+ //
+ // FIXME: Arch specific.
+ getStreamer().EmitZerofill(getContext().getMachOSection(
+ Segment, Section, MCSectionMachO::S_ZEROFILL,
+ 0, SectionKind::getBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createDarwinAsmParser() {
+ return new DarwinAsmParser;
+}
+
+}
diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp
new file mode 100644
index 0000000..7a54dd3
--- /dev/null
+++ b/lib/MC/MCParser/ELFAsmParser.cpp
@@ -0,0 +1,68 @@
+//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+using namespace llvm;
+
+namespace {
+
+class ELFAsmParser : public MCAsmParserExtension {
+ bool ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind);
+
+public:
+ ELFAsmParser() {}
+
+ virtual void Initialize(MCAsmParser &Parser) {
+ // Call the base implementation.
+ this->MCAsmParserExtension::Initialize(Parser);
+
+ Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler(
+ &ELFAsmParser::ParseSectionDirectiveData));
+ Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler(
+ &ELFAsmParser::ParseSectionDirectiveText));
+ }
+
+ bool ParseSectionDirectiveData(StringRef, SMLoc) {
+ return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+ }
+ bool ParseSectionDirectiveText(StringRef, SMLoc) {
+ return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_EXECINSTR |
+ MCSectionELF::SHF_ALLOC, SectionKind::getText());
+ }
+};
+
+}
+
+bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type,
+ unsigned Flags, SectionKind Kind) {
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in section switching directive");
+ Lex();
+
+ getStreamer().SwitchSection(getContext().getELFSection(
+ Section, Type, Flags, Kind));
+
+ return false;
+}
+
+namespace llvm {
+
+MCAsmParserExtension *createELFAsmParser() {
+ return new ELFAsmParser;
+}
+
+}
diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp
index e5b2955..dceece7 100644
--- a/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/lib/MC/MCParser/MCAsmLexer.cpp
@@ -12,12 +12,16 @@
using namespace llvm;
-MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) {
+MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) {
}
MCAsmLexer::~MCAsmLexer() {
}
+SMLoc MCAsmLexer::getLoc() const {
+ return SMLoc::getFromPointer(TokStart);
+}
+
SMLoc AsmToken::getLoc() const {
return SMLoc::getFromPointer(Str.data());
}
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index b8c2054..bee3064 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
#include "llvm/Support/SourceMgr.h"
@@ -23,6 +24,11 @@ const AsmToken &MCAsmParser::getTok() {
return getLexer().getTok();
}
+bool MCAsmParser::TokError(const char *Msg) {
+ Error(getLexer().getLoc(), Msg);
+ return true;
+}
+
bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
SMLoc L;
return ParseExpression(Res, L);
diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp
new file mode 100644
index 0000000..c30d306
--- /dev/null
+++ b/lib/MC/MCParser/MCAsmParserExtension.cpp
@@ -0,0 +1,21 @@
+//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
+using namespace llvm;
+
+MCAsmParserExtension::MCAsmParserExtension() {
+}
+
+MCAsmParserExtension::~MCAsmParserExtension() {
+}
+
+void MCAsmParserExtension::Initialize(MCAsmParser &Parser) {
+ this->Parser = &Parser;
+}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
index d57bb0c..eb53160 100644
--- a/lib/MC/MCSectionCOFF.cpp
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -44,28 +44,28 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
OS << 'w';
else
OS << 'r';
- if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE)
+ if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE)
OS << 'n';
OS << "\"\n";
- if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) {
+ if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
switch (Selection) {
- case IMAGE_COMDAT_SELECT_NODUPLICATES:
+ case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES:
OS << "\t.linkonce one_only\n";
break;
- case IMAGE_COMDAT_SELECT_ANY:
+ case COFF::IMAGE_COMDAT_SELECT_ANY:
OS << "\t.linkonce discard\n";
break;
- case IMAGE_COMDAT_SELECT_SAME_SIZE:
+ case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE:
OS << "\t.linkonce same_size\n";
break;
- case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH:
OS << "\t.linkonce same_contents\n";
break;
//NOTE: as of binutils 2.20, there is no way to specifiy select largest
// with the .linkonce directive. For now, we treat it as an invalid
// comdat selection value.
- case IMAGE_COMDAT_SELECT_LARGEST:
+ case COFF::IMAGE_COMDAT_SELECT_LARGEST:
// OS << "\t.linkonce largest\n";
// break;
default:
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 3207e99..7ca0951 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -33,6 +33,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
default: llvm_unreachable("invalid fixup kind!");
case X86::reloc_pcrel_1byte:
case FK_Data_1: return 0;
+ case X86::reloc_pcrel_2byte:
case FK_Data_2: return 1;
case X86::reloc_pcrel_4byte:
case X86::reloc_riprel_4byte:
@@ -47,6 +48,7 @@ static bool isFixupKindPCRel(unsigned Kind) {
default:
return false;
case X86::reloc_pcrel_1byte:
+ case X86::reloc_pcrel_2byte:
case X86::reloc_pcrel_4byte:
case X86::reloc_riprel_4byte:
case X86::reloc_riprel_4byte_movq_load:
@@ -738,6 +740,51 @@ public:
Relocations[Fragment->getParent()].push_back(MRE);
}
+ void RecordTLVPRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+ !Is64Bit &&
+ "Should only be called with a 32-bit TLVP relocation!");
+
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+ uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = 0;
+
+ // Get the symbol data.
+ MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+ unsigned Index = SD_A->getIndex();
+
+ // We're only going to have a second symbol in pic mode and it'll be a
+ // subtraction from the picbase. For 32-bit pic the addend is the difference
+ // between the picbase and the next address. For 32-bit static the addend
+ // is zero.
+ if (Target.getSymB()) {
+ // If this is a subtraction then we're pcrel.
+ uint32_t FixupAddress =
+ Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
+ MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+ IsPCRel = 1;
+ FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) +
+ Target.getConstant());
+ FixedValue += 1 << Log2Size;
+ } else {
+ FixedValue = 0;
+ }
+
+ // struct relocation_info (8 bytes)
+ MachRelocationEntry MRE;
+ MRE.Word0 = Value;
+ MRE.Word1 = ((Index << 0) |
+ (IsPCRel << 24) |
+ (Log2Size << 25) |
+ (1 << 27) | // Extern
+ (RIT_TLV << 28)); // Type
+ Relocations[Fragment->getParent()].push_back(MRE);
+ }
+
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) {
@@ -749,6 +796,12 @@ public:
unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+ // If this is a 32-bit TLVP reloc it's handled a bit differently.
+ if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+ RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
+ return;
+ }
+
// If this is a difference or a defined symbol plus an offset, then we need
// a scattered relocation entry.
// Differences always require scattered relocations.
@@ -772,7 +825,6 @@ public:
// See <reloc.h>.
uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
- uint32_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
unsigned Type = 0;
@@ -783,7 +835,6 @@ public:
// FIXME: Currently, these are never generated (see code below). I cannot
// find a case where they are actually emitted.
Type = RIT_Vanilla;
- Value = 0;
} else {
// Check whether we need an external or internal relocation.
if (doesSymbolRequireExternRelocation(SD)) {
@@ -794,11 +845,9 @@ public:
// undefined. This occurs with weak definitions, for example.
if (!SD->Symbol->isUndefined())
FixedValue -= Layout.getSymbolAddress(SD);
- Value = 0;
} else {
// The index is the section ordinal (1-based).
Index = SD->getFragment()->getParent()->getOrdinal() + 1;
- Value = Layout.getSymbolAddress(SD);
}
Type = RIT_Vanilla;
@@ -898,7 +947,7 @@ public:
const MCSymbol &Symbol = it->getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(it))
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
continue;
if (!it->isExternal() && !Symbol.isUndefined())
@@ -934,7 +983,7 @@ public:
const MCSymbol &Symbol = it->getSymbol();
// Ignore non-linker visible symbols.
- if (!Asm.isSymbolLinkerVisible(it))
+ if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
continue;
if (it->isExternal() || Symbol.isUndefined())
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
new file mode 100644
index 0000000..6804766
--- /dev/null
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -0,0 +1,71 @@
+//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file writer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFObjectWriter"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+using namespace llvm;
+
+namespace {
+
+ class WinCOFFObjectWriter : public MCObjectWriter {
+ public:
+ WinCOFFObjectWriter(raw_ostream &OS);
+
+ // MCObjectWriter interface implementation.
+
+ void ExecutePostLayoutBinding(MCAssembler &Asm);
+
+ void RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue);
+
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout);
+ };
+}
+
+WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS)
+ : MCObjectWriter(OS, true) {
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MCObjectWriter interface implementations
+
+void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
+}
+
+void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup,
+ MCValue Target,
+ uint64_t &FixedValue) {
+}
+
+void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+}
+
+//------------------------------------------------------------------------------
+// WinCOFFObjectWriter factory function
+
+namespace llvm {
+ MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) {
+ return new WinCOFFObjectWriter(OS);
+ }
+}
diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp
new file mode 100644
index 0000000..1030cdb
--- /dev/null
+++ b/lib/MC/WinCOFFStreamer.cpp
@@ -0,0 +1,198 @@
+//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains an implementation of a Win32 COFF object file streamer.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "WinCOFFStreamer"
+
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/Target/TargetAsmBackend.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define dbg_notimpl(x) \
+ do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \
+ abort(); } while (false);
+
+namespace {
+class WinCOFFStreamer : public MCObjectStreamer {
+public:
+ WinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS);
+
+ // MCStreamer interface
+
+ virtual void EmitLabel(MCSymbol *Symbol);
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
+ virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment);
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size,unsigned ByteAlignment);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment);
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace);
+ virtual void EmitGPRel32Value(const MCExpr *Value);
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+ unsigned ValueSize, unsigned MaxBytesToEmit);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit);
+ virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
+ virtual void EmitFileDirective(StringRef Filename);
+ virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename);
+ virtual void EmitInstruction(const MCInst &Instruction);
+ virtual void Finish();
+};
+} // end anonymous namespace.
+
+WinCOFFStreamer::WinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS)
+ : MCObjectStreamer(Context, TAB, OS, &CE) {
+}
+
+// MCStreamer interface
+
+void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) {
+}
+
+void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ dbg_notimpl("Flag = " << Flag);
+}
+
+void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+}
+
+void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
+ MCSymbolAttr Attribute) {
+}
+
+void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue);
+}
+
+void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) {
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {
+}
+
+void WinCOFFStreamer::EmitCOFFSymbolType(int Type) {
+}
+
+void WinCOFFStreamer::EndCOFFSymbolDef() {
+}
+
+void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value);
+}
+
+void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+}
+
+void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+}
+
+void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
+ unsigned Size,unsigned ByteAlignment) {
+ MCSectionCOFF const *SectionCOFF =
+ static_cast<MCSectionCOFF const *>(Section);
+
+ dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
+ Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
+ << ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ MCSectionCOFF const *SectionCOFF =
+ static_cast<MCSectionCOFF const *>(Section);
+
+ dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " <<
+ Symbol->getName() << ", Size = " << Size << ", ByteAlignment = "
+ << ByteAlignment);
+}
+
+void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
+}
+
+void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size,
+ unsigned AddrSpace) {
+}
+
+void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) {
+ dbg_notimpl("Value = '" << *Value);
+}
+
+void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment,
+ int64_t Value,
+ unsigned ValueSize,
+ unsigned MaxBytesToEmit) {
+}
+
+void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {
+}
+
+void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) {
+ dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value);
+}
+
+void WinCOFFStreamer::EmitFileDirective(StringRef Filename) {
+ // Ignore for now, linkers don't care, and proper debug
+ // info will be a much large effort.
+}
+
+void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo,
+ StringRef Filename) {
+ dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'");
+}
+
+void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) {
+}
+
+void WinCOFFStreamer::Finish() {
+ MCObjectStreamer::Finish();
+}
+
+namespace llvm
+{
+ MCStreamer *createWinCOFFStreamer(MCContext &Context,
+ TargetAsmBackend &TAB,
+ MCCodeEmitter &CE,
+ raw_ostream &OS) {
+ return new WinCOFFStreamer(Context, TAB, CE, OS);
+ }
+}
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index f1347f9..366d2f7 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -8,6 +8,7 @@ add_llvm_library(LLVMSupport
ConstantRange.cpp
Debug.cpp
DeltaAlgorithm.cpp
+ DAGDeltaAlgorithm.cpp
Dwarf.cpp
ErrorHandling.cpp
FileUtilities.cpp
diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp
new file mode 100644
index 0000000..8145664
--- /dev/null
+++ b/lib/Support/DAGDeltaAlgorithm.cpp
@@ -0,0 +1,357 @@
+//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// The algorithm we use attempts to exploit the dependency information by
+// minimizing top-down. We start by constructing an initial root set R, and
+// then iteratively:
+//
+// 1. Minimize the set R using the test predicate:
+// P'(S) = P(S union pred*(S))
+//
+// 2. Extend R to R' = R union pred(R).
+//
+// until a fixed point is reached.
+//
+// The idea is that we want to quickly prune entire portions of the graph, so we
+// try to find high-level nodes that can be eliminated with all of their
+// dependents.
+//
+// FIXME: The current algorithm doesn't actually provide a strong guarantee
+// about the minimality of the result. The problem is that after adding nodes to
+// the required set, we no longer consider them for elimination. For strictly
+// well formed predicates, this doesn't happen, but it commonly occurs in
+// practice when there are unmodelled dependencies. I believe we can resolve
+// this by allowing the required set to be minimized as well, but need more test
+// cases first.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DAGDeltaAlgorithm.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <map>
+using namespace llvm;
+
+namespace {
+
+class DAGDeltaAlgorithmImpl {
+ friend class DeltaActiveSetHelper;
+
+public:
+ typedef DAGDeltaAlgorithm::change_ty change_ty;
+ typedef DAGDeltaAlgorithm::changeset_ty changeset_ty;
+ typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty;
+ typedef DAGDeltaAlgorithm::edge_ty edge_ty;
+
+private:
+ typedef std::vector<change_ty>::iterator pred_iterator_ty;
+ typedef std::vector<change_ty>::iterator succ_iterator_ty;
+ typedef std::set<change_ty>::iterator pred_closure_iterator_ty;
+ typedef std::set<change_ty>::iterator succ_closure_iterator_ty;
+
+ DAGDeltaAlgorithm &DDA;
+
+ const changeset_ty &Changes;
+ const std::vector<edge_ty> &Dependencies;
+
+ std::vector<change_ty> Roots;
+
+ /// Cache of failed test results. Successful test results are never cached
+ /// since we always reduce following a success. We maintain an independent
+ /// cache from that used by the individual delta passes because we may get
+ /// hits across multiple individual delta invocations.
+ mutable std::set<changeset_ty> FailedTestsCache;
+
+ // FIXME: Gross.
+ std::map<change_ty, std::vector<change_ty> > Predecessors;
+ std::map<change_ty, std::vector<change_ty> > Successors;
+
+ std::map<change_ty, std::set<change_ty> > PredClosure;
+ std::map<change_ty, std::set<change_ty> > SuccClosure;
+
+private:
+ pred_iterator_ty pred_begin(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].begin();
+ }
+ pred_iterator_ty pred_end(change_ty Node) {
+ assert(Predecessors.count(Node) && "Invalid node!");
+ return Predecessors[Node].end();
+ }
+
+ pred_closure_iterator_ty pred_closure_begin(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].begin();
+ }
+ pred_closure_iterator_ty pred_closure_end(change_ty Node) {
+ assert(PredClosure.count(Node) && "Invalid node!");
+ return PredClosure[Node].end();
+ }
+
+ succ_iterator_ty succ_begin(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].begin();
+ }
+ succ_iterator_ty succ_end(change_ty Node) {
+ assert(Successors.count(Node) && "Invalid node!");
+ return Successors[Node].end();
+ }
+
+ succ_closure_iterator_ty succ_closure_begin(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].begin();
+ }
+ succ_closure_iterator_ty succ_closure_end(change_ty Node) {
+ assert(SuccClosure.count(Node) && "Invalid node!");
+ return SuccClosure[Node].end();
+ }
+
+ void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ const changeset_ty &Required) {
+ DDA.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+ bool ExecuteOneTest(const changeset_ty &S) {
+ // Check dependencies invariant.
+ DEBUG({
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it)
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2)
+ assert(S.count(*it2) && "Attempt to run invalid changeset!");
+ });
+
+ return DDA.ExecuteOneTest(S);
+ }
+
+public:
+ DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty> &_Dependencies);
+
+ changeset_ty Run();
+
+ /// GetTestResult - Get the test result for the active set \arg Changes with
+ /// \arg Required changes from the cache, executing the test if necessary.
+ ///
+ /// \param Changes - The set of active changes being minimized, which should
+ /// have their pred closure included in the test.
+ /// \param Required - The set of changes which have previously been
+ /// established to be required.
+ /// \return - The test result.
+ bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required);
+};
+
+/// Helper object for minimizing an active set of changes.
+class DeltaActiveSetHelper : public DeltaAlgorithm {
+ DAGDeltaAlgorithmImpl &DDAI;
+
+ const changeset_ty &Required;
+
+protected:
+ /// UpdatedSearchState - Callback used when the search state changes.
+ virtual void UpdatedSearchState(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) {
+ DDAI.UpdatedSearchState(Changes, Sets, Required);
+ }
+
+ virtual bool ExecuteOneTest(const changeset_ty &S) {
+ return DDAI.GetTestResult(S, Required);
+ }
+
+public:
+ DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI,
+ const changeset_ty &_Required)
+ : DDAI(_DDAI), Required(_Required) {}
+};
+
+}
+
+DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA,
+ const changeset_ty &_Changes,
+ const std::vector<edge_ty>
+ &_Dependencies)
+ : DDA(_DDA),
+ Changes(_Changes),
+ Dependencies(_Dependencies)
+{
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ Predecessors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ Successors.insert(std::make_pair(*it, std::vector<change_ty>()));
+ }
+ for (std::vector<edge_ty>::const_iterator it = Dependencies.begin(),
+ ie = Dependencies.end(); it != ie; ++it) {
+ Predecessors[it->second].push_back(it->first);
+ Successors[it->first].push_back(it->second);
+ }
+
+ // Compute the roots.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ if (succ_begin(*it) == succ_end(*it))
+ Roots.push_back(*it);
+
+ // Pre-compute the closure of the successor relation.
+ std::vector<change_ty> Worklist(Roots.begin(), Roots.end());
+ while (!Worklist.empty()) {
+ change_ty Change = Worklist.back();
+ Worklist.pop_back();
+
+ std::set<change_ty> &ChangeSuccs = SuccClosure[Change];
+ for (pred_iterator_ty it = pred_begin(Change),
+ ie = pred_end(Change); it != ie; ++it) {
+ SuccClosure[*it].insert(Change);
+ SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end());
+ Worklist.push_back(*it);
+ }
+ }
+
+ // Invert to form the predecessor closure map.
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ PredClosure.insert(std::make_pair(*it, std::set<change_ty>()));
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2)
+ PredClosure[*it2].insert(*it);
+
+ // Dump useful debug info.
+ DEBUG({
+ llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n";
+ llvm::errs() << "Changes: [";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ if (it != Changes.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+
+ if (succ_begin(*it) != succ_end(*it)) {
+ llvm::errs() << "(";
+ for (succ_iterator_ty it2 = succ_begin(*it),
+ ie2 = succ_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << "->" << *it2;
+ }
+ llvm::errs() << ")";
+ }
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Roots: [";
+ for (std::vector<change_ty>::const_iterator it = Roots.begin(),
+ ie = Roots.end(); it != ie; ++it) {
+ if (it != Roots.begin()) llvm::errs() << ", ";
+ llvm::errs() << *it;
+ }
+ llvm::errs() << "]\n";
+
+ llvm::errs() << "Predecessor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (pred_closure_iterator_ty it2 = pred_closure_begin(*it),
+ ie2 = pred_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != pred_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "Successor Closure:\n";
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it) {
+ llvm::errs() << format(" %-4d: [", *it);
+ for (succ_closure_iterator_ty it2 = succ_closure_begin(*it),
+ ie2 = succ_closure_end(*it); it2 != ie2; ++it2) {
+ if (it2 != succ_closure_begin(*it)) llvm::errs() << ", ";
+ llvm::errs() << *it2;
+ }
+ llvm::errs() << "]\n";
+ }
+
+ llvm::errs() << "\n\n";
+ });
+}
+
+bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes,
+ const changeset_ty &Required) {
+ changeset_ty Extended(Required);
+ Extended.insert(Changes.begin(), Changes.end());
+ for (changeset_ty::const_iterator it = Changes.begin(),
+ ie = Changes.end(); it != ie; ++it)
+ Extended.insert(pred_closure_begin(*it), pred_closure_end(*it));
+
+ if (FailedTestsCache.count(Extended))
+ return false;
+
+ bool Result = ExecuteOneTest(Extended);
+ if (!Result)
+ FailedTestsCache.insert(Extended);
+
+ return Result;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithmImpl::Run() {
+ // The current set of changes we are minimizing, starting at the roots.
+ changeset_ty CurrentSet(Roots.begin(), Roots.end());
+
+ // The set of required changes.
+ changeset_ty Required;
+
+ // Iterate until the active set of changes is empty. Convergence is guaranteed
+ // assuming input was a DAG.
+ //
+ // Invariant: CurrentSet intersect Required == {}
+ // Invariant: Required == (Required union succ*(Required))
+ while (!CurrentSet.empty()) {
+ DEBUG({
+ llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, "
+ << Required.size() << " required changes\n";
+ });
+
+ // Minimize the current set of changes.
+ DeltaActiveSetHelper Helper(*this, Required);
+ changeset_ty CurrentMinSet = Helper.Run(CurrentSet);
+
+ // Update the set of required changes. Since
+ // CurrentMinSet subset CurrentSet
+ // and after the last iteration,
+ // succ(CurrentSet) subset Required
+ // then
+ // succ(CurrentMinSet) subset Required
+ // and our invariant on Required is maintained.
+ Required.insert(CurrentMinSet.begin(), CurrentMinSet.end());
+
+ // Replace the current set with the predecssors of the minimized set of
+ // active changes.
+ CurrentSet.clear();
+ for (changeset_ty::const_iterator it = CurrentMinSet.begin(),
+ ie = CurrentMinSet.end(); it != ie; ++it)
+ CurrentSet.insert(pred_begin(*it), pred_end(*it));
+
+ // FIXME: We could enforce CurrentSet intersect Required == {} here if we
+ // wanted to protect against cyclic graphs.
+ }
+
+ return Required;
+}
+
+DAGDeltaAlgorithm::changeset_ty
+DAGDeltaAlgorithm::Run(const changeset_ty &Changes,
+ const std::vector<edge_ty> &Dependencies) {
+ return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run();
+}
diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp
index d176548..9e52874 100644
--- a/lib/Support/DeltaAlgorithm.cpp
+++ b/lib/Support/DeltaAlgorithm.cpp
@@ -30,10 +30,10 @@ void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
// FIXME: This is really slow.
changeset_ty LHS, RHS;
- unsigned idx = 0;
+ unsigned idx = 0, N = S.size() / 2;
for (changeset_ty::const_iterator it = S.begin(),
ie = S.end(); it != ie; ++it, ++idx)
- ((idx & 1) ? LHS : RHS).insert(*it);
+ ((idx < N) ? LHS : RHS).insert(*it);
if (!LHS.empty())
Res.push_back(LHS);
if (!RHS.empty())
diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp
index c19c2d6..96ce9d3 100644
--- a/lib/Support/Dwarf.cpp
+++ b/lib/Support/Dwarf.cpp
@@ -86,8 +86,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) {
///
const char *llvm::dwarf::ChildrenString(unsigned Children) {
switch (Children) {
- case DW_CHILDREN_no: return "CHILDREN_no";
- case DW_CHILDREN_yes: return "CHILDREN_yes";
+ case DW_CHILDREN_no: return "DW_CHILDREN_no";
+ case DW_CHILDREN_yes: return "DW_CHILDREN_yes";
}
return 0;
}
@@ -207,27 +207,27 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) {
///
const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
switch (Encoding) {
- case DW_FORM_addr: return "FORM_addr";
- case DW_FORM_block2: return "FORM_block2";
- case DW_FORM_block4: return "FORM_block4";
- case DW_FORM_data2: return "FORM_data2";
- case DW_FORM_data4: return "FORM_data4";
- case DW_FORM_data8: return "FORM_data8";
- case DW_FORM_string: return "FORM_string";
- case DW_FORM_block: return "FORM_block";
- case DW_FORM_block1: return "FORM_block1";
- case DW_FORM_data1: return "FORM_data1";
- case DW_FORM_flag: return "FORM_flag";
- case DW_FORM_sdata: return "FORM_sdata";
- case DW_FORM_strp: return "FORM_strp";
- case DW_FORM_udata: return "FORM_udata";
- case DW_FORM_ref_addr: return "FORM_ref_addr";
- case DW_FORM_ref1: return "FORM_ref1";
- case DW_FORM_ref2: return "FORM_ref2";
- case DW_FORM_ref4: return "FORM_ref4";
- case DW_FORM_ref8: return "FORM_ref8";
- case DW_FORM_ref_udata: return "FORM_ref_udata";
- case DW_FORM_indirect: return "FORM_indirect";
+ case DW_FORM_addr: return "DW_FORM_addr";
+ case DW_FORM_block2: return "DW_FORM_block2";
+ case DW_FORM_block4: return "DW_FORM_block4";
+ case DW_FORM_data2: return "DW_FORM_data2";
+ case DW_FORM_data4: return "DW_FORM_data4";
+ case DW_FORM_data8: return "DW_FORM_data8";
+ case DW_FORM_string: return "DW_FORM_string";
+ case DW_FORM_block: return "DW_FORM_block";
+ case DW_FORM_block1: return "DW_FORM_block1";
+ case DW_FORM_data1: return "DW_FORM_data1";
+ case DW_FORM_flag: return "DW_FORM_flag";
+ case DW_FORM_sdata: return "DW_FORM_sdata";
+ case DW_FORM_strp: return "DW_FORM_strp";
+ case DW_FORM_udata: return "DW_FORM_udata";
+ case DW_FORM_ref_addr: return "DW_FORM_ref_addr";
+ case DW_FORM_ref1: return "DW_FORM_ref1";
+ case DW_FORM_ref2: return "DW_FORM_ref2";
+ case DW_FORM_ref4: return "DW_FORM_ref4";
+ case DW_FORM_ref8: return "DW_FORM_ref8";
+ case DW_FORM_ref_udata: return "DW_FORM_ref_udata";
+ case DW_FORM_indirect: return "DW_FORM_indirect";
}
return 0;
}
@@ -236,72 +236,159 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
/// encoding.
const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
switch (Encoding) {
- case DW_OP_addr: return "OP_addr";
- case DW_OP_deref: return "OP_deref";
- case DW_OP_const1u: return "OP_const1u";
- case DW_OP_const1s: return "OP_const1s";
- case DW_OP_const2u: return "OP_const2u";
- case DW_OP_const2s: return "OP_const2s";
- case DW_OP_const4u: return "OP_const4u";
- case DW_OP_const4s: return "OP_const4s";
- case DW_OP_const8u: return "OP_const8u";
- case DW_OP_const8s: return "OP_const8s";
- case DW_OP_constu: return "OP_constu";
- case DW_OP_consts: return "OP_consts";
- case DW_OP_dup: return "OP_dup";
- case DW_OP_drop: return "OP_drop";
- case DW_OP_over: return "OP_over";
- case DW_OP_pick: return "OP_pick";
- case DW_OP_swap: return "OP_swap";
- case DW_OP_rot: return "OP_rot";
- case DW_OP_xderef: return "OP_xderef";
- case DW_OP_abs: return "OP_abs";
- case DW_OP_and: return "OP_and";
- case DW_OP_div: return "OP_div";
- case DW_OP_minus: return "OP_minus";
- case DW_OP_mod: return "OP_mod";
- case DW_OP_mul: return "OP_mul";
- case DW_OP_neg: return "OP_neg";
- case DW_OP_not: return "OP_not";
- case DW_OP_or: return "OP_or";
- case DW_OP_plus: return "OP_plus";
- case DW_OP_plus_uconst: return "OP_plus_uconst";
- case DW_OP_shl: return "OP_shl";
- case DW_OP_shr: return "OP_shr";
- case DW_OP_shra: return "OP_shra";
- case DW_OP_xor: return "OP_xor";
- case DW_OP_skip: return "OP_skip";
- case DW_OP_bra: return "OP_bra";
- case DW_OP_eq: return "OP_eq";
- case DW_OP_ge: return "OP_ge";
- case DW_OP_gt: return "OP_gt";
- case DW_OP_le: return "OP_le";
- case DW_OP_lt: return "OP_lt";
- case DW_OP_ne: return "OP_ne";
- case DW_OP_lit0: return "OP_lit0";
- case DW_OP_lit1: return "OP_lit1";
- case DW_OP_lit31: return "OP_lit31";
- case DW_OP_reg0: return "OP_reg0";
- case DW_OP_reg1: return "OP_reg1";
- case DW_OP_reg31: return "OP_reg31";
- case DW_OP_breg0: return "OP_breg0";
- case DW_OP_breg1: return "OP_breg1";
- case DW_OP_breg31: return "OP_breg31";
- case DW_OP_regx: return "OP_regx";
- case DW_OP_fbreg: return "OP_fbreg";
- case DW_OP_bregx: return "OP_bregx";
- case DW_OP_piece: return "OP_piece";
- case DW_OP_deref_size: return "OP_deref_size";
- case DW_OP_xderef_size: return "OP_xderef_size";
- case DW_OP_nop: return "OP_nop";
- case DW_OP_push_object_address: return "OP_push_object_address";
- case DW_OP_call2: return "OP_call2";
- case DW_OP_call4: return "OP_call4";
- case DW_OP_call_ref: return "OP_call_ref";
- case DW_OP_form_tls_address: return "OP_form_tls_address";
- case DW_OP_call_frame_cfa: return "OP_call_frame_cfa";
- case DW_OP_lo_user: return "OP_lo_user";
- case DW_OP_hi_user: return "OP_hi_user";
+ case DW_OP_addr: return "DW_OP_addr";
+ case DW_OP_deref: return "DW_OP_deref";
+ case DW_OP_const1u: return "DW_OP_const1u";
+ case DW_OP_const1s: return "DW_OP_const1s";
+ case DW_OP_const2u: return "DW_OP_const2u";
+ case DW_OP_const2s: return "DW_OP_const2s";
+ case DW_OP_const4u: return "DW_OP_const4u";
+ case DW_OP_const4s: return "DW_OP_const4s";
+ case DW_OP_const8u: return "DW_OP_const8u";
+ case DW_OP_const8s: return "DW_OP_const8s";
+ case DW_OP_constu: return "DW_OP_constu";
+ case DW_OP_consts: return "DW_OP_consts";
+ case DW_OP_dup: return "DW_OP_dup";
+ case DW_OP_drop: return "DW_OP_drop";
+ case DW_OP_over: return "DW_OP_over";
+ case DW_OP_pick: return "DW_OP_pick";
+ case DW_OP_swap: return "DW_OP_swap";
+ case DW_OP_rot: return "DW_OP_rot";
+ case DW_OP_xderef: return "DW_OP_xderef";
+ case DW_OP_abs: return "DW_OP_abs";
+ case DW_OP_and: return "DW_OP_and";
+ case DW_OP_div: return "DW_OP_div";
+ case DW_OP_minus: return "DW_OP_minus";
+ case DW_OP_mod: return "DW_OP_mod";
+ case DW_OP_mul: return "DW_OP_mul";
+ case DW_OP_neg: return "DW_OP_neg";
+ case DW_OP_not: return "DW_OP_not";
+ case DW_OP_or: return "DW_OP_or";
+ case DW_OP_plus: return "DW_OP_plus";
+ case DW_OP_plus_uconst: return "DW_OP_plus_uconst";
+ case DW_OP_shl: return "DW_OP_shl";
+ case DW_OP_shr: return "DW_OP_shr";
+ case DW_OP_shra: return "DW_OP_shra";
+ case DW_OP_xor: return "DW_OP_xor";
+ case DW_OP_skip: return "DW_OP_skip";
+ case DW_OP_bra: return "DW_OP_bra";
+ case DW_OP_eq: return "DW_OP_eq";
+ case DW_OP_ge: return "DW_OP_ge";
+ case DW_OP_gt: return "DW_OP_gt";
+ case DW_OP_le: return "DW_OP_le";
+ case DW_OP_lt: return "DW_OP_lt";
+ case DW_OP_ne: return "DW_OP_ne";
+ case DW_OP_lit0: return "DW_OP_lit0";
+ case DW_OP_lit1: return "DW_OP_lit1";
+ case DW_OP_lit2: return "DW_OP_lit2";
+ case DW_OP_lit3: return "DW_OP_lit3";
+ case DW_OP_lit4: return "DW_OP_lit4";
+ case DW_OP_lit5: return "DW_OP_lit5";
+ case DW_OP_lit6: return "DW_OP_lit6";
+ case DW_OP_lit7: return "DW_OP_lit7";
+ case DW_OP_lit8: return "DW_OP_lit8";
+ case DW_OP_lit9: return "DW_OP_lit9";
+ case DW_OP_lit10: return "DW_OP_lit10";
+ case DW_OP_lit11: return "DW_OP_lit11";
+ case DW_OP_lit12: return "DW_OP_lit12";
+ case DW_OP_lit13: return "DW_OP_lit13";
+ case DW_OP_lit14: return "DW_OP_lit14";
+ case DW_OP_lit15: return "DW_OP_lit15";
+ case DW_OP_lit16: return "DW_OP_lit16";
+ case DW_OP_lit17: return "DW_OP_lit17";
+ case DW_OP_lit18: return "DW_OP_lit18";
+ case DW_OP_lit19: return "DW_OP_lit19";
+ case DW_OP_lit20: return "DW_OP_lit20";
+ case DW_OP_lit21: return "DW_OP_lit21";
+ case DW_OP_lit22: return "DW_OP_lit22";
+ case DW_OP_lit23: return "DW_OP_lit23";
+ case DW_OP_lit24: return "DW_OP_lit24";
+ case DW_OP_lit25: return "DW_OP_lit25";
+ case DW_OP_lit26: return "DW_OP_lit26";
+ case DW_OP_lit27: return "DW_OP_lit27";
+ case DW_OP_lit28: return "DW_OP_lit28";
+ case DW_OP_lit29: return "DW_OP_lit29";
+ case DW_OP_lit30: return "DW_OP_lit30";
+ case DW_OP_lit31: return "DW_OP_lit31";
+ case DW_OP_reg0: return "DW_OP_reg0";
+ case DW_OP_reg1: return "DW_OP_reg1";
+ case DW_OP_reg2: return "DW_OP_reg2";
+ case DW_OP_reg3: return "DW_OP_reg3";
+ case DW_OP_reg4: return "DW_OP_reg4";
+ case DW_OP_reg5: return "DW_OP_reg5";
+ case DW_OP_reg6: return "DW_OP_reg6";
+ case DW_OP_reg7: return "DW_OP_reg7";
+ case DW_OP_reg8: return "DW_OP_reg8";
+ case DW_OP_reg9: return "DW_OP_reg9";
+ case DW_OP_reg10: return "DW_OP_reg10";
+ case DW_OP_reg11: return "DW_OP_reg11";
+ case DW_OP_reg12: return "DW_OP_reg12";
+ case DW_OP_reg13: return "DW_OP_reg13";
+ case DW_OP_reg14: return "DW_OP_reg14";
+ case DW_OP_reg15: return "DW_OP_reg15";
+ case DW_OP_reg16: return "DW_OP_reg16";
+ case DW_OP_reg17: return "DW_OP_reg17";
+ case DW_OP_reg18: return "DW_OP_reg18";
+ case DW_OP_reg19: return "DW_OP_reg19";
+ case DW_OP_reg20: return "DW_OP_reg20";
+ case DW_OP_reg21: return "DW_OP_reg21";
+ case DW_OP_reg22: return "DW_OP_reg22";
+ case DW_OP_reg23: return "DW_OP_reg23";
+ case DW_OP_reg24: return "DW_OP_reg24";
+ case DW_OP_reg25: return "DW_OP_reg25";
+ case DW_OP_reg26: return "DW_OP_reg26";
+ case DW_OP_reg27: return "DW_OP_reg27";
+ case DW_OP_reg28: return "DW_OP_reg28";
+ case DW_OP_reg29: return "DW_OP_reg29";
+ case DW_OP_reg30: return "DW_OP_reg30";
+ case DW_OP_reg31: return "DW_OP_reg31";
+ case DW_OP_breg0: return "DW_OP_breg0";
+ case DW_OP_breg1: return "DW_OP_breg1";
+ case DW_OP_breg2: return "DW_OP_breg2";
+ case DW_OP_breg3: return "DW_OP_breg3";
+ case DW_OP_breg4: return "DW_OP_breg4";
+ case DW_OP_breg5: return "DW_OP_breg5";
+ case DW_OP_breg6: return "DW_OP_breg6";
+ case DW_OP_breg7: return "DW_OP_breg7";
+ case DW_OP_breg8: return "DW_OP_breg8";
+ case DW_OP_breg9: return "DW_OP_breg9";
+ case DW_OP_breg10: return "DW_OP_breg10";
+ case DW_OP_breg11: return "DW_OP_breg11";
+ case DW_OP_breg12: return "DW_OP_breg12";
+ case DW_OP_breg13: return "DW_OP_breg13";
+ case DW_OP_breg14: return "DW_OP_breg14";
+ case DW_OP_breg15: return "DW_OP_breg15";
+ case DW_OP_breg16: return "DW_OP_breg16";
+ case DW_OP_breg17: return "DW_OP_breg17";
+ case DW_OP_breg18: return "DW_OP_breg18";
+ case DW_OP_breg19: return "DW_OP_breg19";
+ case DW_OP_breg20: return "DW_OP_breg20";
+ case DW_OP_breg21: return "DW_OP_breg21";
+ case DW_OP_breg22: return "DW_OP_breg22";
+ case DW_OP_breg23: return "DW_OP_breg23";
+ case DW_OP_breg24: return "DW_OP_breg24";
+ case DW_OP_breg25: return "DW_OP_breg25";
+ case DW_OP_breg26: return "DW_OP_breg26";
+ case DW_OP_breg27: return "DW_OP_breg27";
+ case DW_OP_breg28: return "DW_OP_breg28";
+ case DW_OP_breg29: return "DW_OP_breg29";
+ case DW_OP_breg30: return "DW_OP_breg30";
+ case DW_OP_breg31: return "DW_OP_breg31";
+ case DW_OP_regx: return "DW_OP_regx";
+ case DW_OP_fbreg: return "DW_OP_fbreg";
+ case DW_OP_bregx: return "DW_OP_bregx";
+ case DW_OP_piece: return "DW_OP_piece";
+ case DW_OP_deref_size: return "DW_OP_deref_size";
+ case DW_OP_xderef_size: return "DW_OP_xderef_size";
+ case DW_OP_nop: return "DW_OP_nop";
+ case DW_OP_push_object_address: return "DW_OP_push_object_address";
+ case DW_OP_call2: return "DW_OP_call2";
+ case DW_OP_call4: return "DW_OP_call4";
+ case DW_OP_call_ref: return "DW_OP_call_ref";
+ case DW_OP_form_tls_address: return "DW_OP_form_tls_address";
+ case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa";
+ case DW_OP_lo_user: return "DW_OP_lo_user";
+ case DW_OP_hi_user: return "DW_OP_hi_user";
}
return 0;
}
@@ -310,23 +397,23 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
/// encoding.
const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
switch (Encoding) {
- case DW_ATE_address: return "ATE_address";
- case DW_ATE_boolean: return "ATE_boolean";
- case DW_ATE_complex_float: return "ATE_complex_float";
- case DW_ATE_float: return "ATE_float";
- case DW_ATE_signed: return "ATE_signed";
- case DW_ATE_signed_char: return "ATE_signed_char";
- case DW_ATE_unsigned: return "ATE_unsigned";
- case DW_ATE_unsigned_char: return "ATE_unsigned_char";
- case DW_ATE_imaginary_float: return "ATE_imaginary_float";
- case DW_ATE_packed_decimal: return "ATE_packed_decimal";
- case DW_ATE_numeric_string: return "ATE_numeric_string";
- case DW_ATE_edited: return "ATE_edited";
- case DW_ATE_signed_fixed: return "ATE_signed_fixed";
- case DW_ATE_unsigned_fixed: return "ATE_unsigned_fixed";
- case DW_ATE_decimal_float: return "ATE_decimal_float";
- case DW_ATE_lo_user: return "ATE_lo_user";
- case DW_ATE_hi_user: return "ATE_hi_user";
+ case DW_ATE_address: return "DW_ATE_address";
+ case DW_ATE_boolean: return "DW_ATE_boolean";
+ case DW_ATE_complex_float: return "DW_ATE_complex_float";
+ case DW_ATE_float: return "DW_ATE_float";
+ case DW_ATE_signed: return "DW_ATE_signed";
+ case DW_ATE_signed_char: return "DW_ATE_signed_char";
+ case DW_ATE_unsigned: return "DW_ATE_unsigned";
+ case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char";
+ case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float";
+ case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal";
+ case DW_ATE_numeric_string: return "DW_ATE_numeric_string";
+ case DW_ATE_edited: return "DW_ATE_edited";
+ case DW_ATE_signed_fixed: return "DW_ATE_signed_fixed";
+ case DW_ATE_unsigned_fixed: return "DW_ATE_unsigned_fixed";
+ case DW_ATE_decimal_float: return "DW_ATE_decimal_float";
+ case DW_ATE_lo_user: return "DW_ATE_lo_user";
+ case DW_ATE_hi_user: return "DW_ATE_hi_user";
}
return 0;
}
@@ -335,11 +422,11 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
/// attribute.
const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
switch (Sign) {
- case DW_DS_unsigned: return "DS_unsigned";
- case DW_DS_leading_overpunch: return "DS_leading_overpunch";
- case DW_DS_trailing_overpunch: return "DS_trailing_overpunch";
- case DW_DS_leading_separate: return "DS_leading_separate";
- case DW_DS_trailing_separate: return "DS_trailing_separate";
+ case DW_DS_unsigned: return "DW_DS_unsigned";
+ case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch";
+ case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch";
+ case DW_DS_leading_separate: return "DW_DS_leading_separate";
+ case DW_DS_trailing_separate: return "DW_DS_trailing_separate";
}
return 0;
}
@@ -348,11 +435,11 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
///
const char *llvm::dwarf::EndianityString(unsigned Endian) {
switch (Endian) {
- case DW_END_default: return "END_default";
- case DW_END_big: return "END_big";
- case DW_END_little: return "END_little";
- case DW_END_lo_user: return "END_lo_user";
- case DW_END_hi_user: return "END_hi_user";
+ case DW_END_default: return "DW_END_default";
+ case DW_END_big: return "DW_END_big";
+ case DW_END_little: return "DW_END_little";
+ case DW_END_lo_user: return "DW_END_lo_user";
+ case DW_END_hi_user: return "DW_END_hi_user";
}
return 0;
}
@@ -362,9 +449,9 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) {
const char *llvm::dwarf::AccessibilityString(unsigned Access) {
switch (Access) {
// Accessibility codes
- case DW_ACCESS_public: return "ACCESS_public";
- case DW_ACCESS_protected: return "ACCESS_protected";
- case DW_ACCESS_private: return "ACCESS_private";
+ case DW_ACCESS_public: return "DW_ACCESS_public";
+ case DW_ACCESS_protected: return "DW_ACCESS_protected";
+ case DW_ACCESS_private: return "DW_ACCESS_private";
}
return 0;
}
@@ -373,9 +460,9 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) {
///
const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
switch (Visibility) {
- case DW_VIS_local: return "VIS_local";
- case DW_VIS_exported: return "VIS_exported";
- case DW_VIS_qualified: return "VIS_qualified";
+ case DW_VIS_local: return "DW_VIS_local";
+ case DW_VIS_exported: return "DW_VIS_exported";
+ case DW_VIS_qualified: return "DW_VIS_qualified";
}
return 0;
}
@@ -384,9 +471,9 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
///
const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
switch (Virtuality) {
- case DW_VIRTUALITY_none: return "VIRTUALITY_none";
- case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual";
- case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual";
+ case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none";
+ case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual";
+ case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual";
}
return 0;
}
@@ -395,27 +482,27 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
///
const char *llvm::dwarf::LanguageString(unsigned Language) {
switch (Language) {
- case DW_LANG_C89: return "LANG_C89";
- case DW_LANG_C: return "LANG_C";
- case DW_LANG_Ada83: return "LANG_Ada83";
- case DW_LANG_C_plus_plus: return "LANG_C_plus_plus";
- case DW_LANG_Cobol74: return "LANG_Cobol74";
- case DW_LANG_Cobol85: return "LANG_Cobol85";
- case DW_LANG_Fortran77: return "LANG_Fortran77";
- case DW_LANG_Fortran90: return "LANG_Fortran90";
- case DW_LANG_Pascal83: return "LANG_Pascal83";
- case DW_LANG_Modula2: return "LANG_Modula2";
- case DW_LANG_Java: return "LANG_Java";
- case DW_LANG_C99: return "LANG_C99";
- case DW_LANG_Ada95: return "LANG_Ada95";
- case DW_LANG_Fortran95: return "LANG_Fortran95";
- case DW_LANG_PLI: return "LANG_PLI";
- case DW_LANG_ObjC: return "LANG_ObjC";
- case DW_LANG_ObjC_plus_plus: return "LANG_ObjC_plus_plus";
- case DW_LANG_UPC: return "LANG_UPC";
- case DW_LANG_D: return "LANG_D";
- case DW_LANG_lo_user: return "LANG_lo_user";
- case DW_LANG_hi_user: return "LANG_hi_user";
+ case DW_LANG_C89: return "DW_LANG_C89";
+ case DW_LANG_C: return "DW_LANG_C";
+ case DW_LANG_Ada83: return "DW_LANG_Ada83";
+ case DW_LANG_C_plus_plus: return "DW_LANG_C_plus_plus";
+ case DW_LANG_Cobol74: return "DW_LANG_Cobol74";
+ case DW_LANG_Cobol85: return "DW_LANG_Cobol85";
+ case DW_LANG_Fortran77: return "DW_LANG_Fortran77";
+ case DW_LANG_Fortran90: return "DW_LANG_Fortran90";
+ case DW_LANG_Pascal83: return "DW_LANG_Pascal83";
+ case DW_LANG_Modula2: return "DW_LANG_Modula2";
+ case DW_LANG_Java: return "DW_LANG_Java";
+ case DW_LANG_C99: return "DW_LANG_C99";
+ case DW_LANG_Ada95: return "DW_LANG_Ada95";
+ case DW_LANG_Fortran95: return "DW_LANG_Fortran95";
+ case DW_LANG_PLI: return "DW_LANG_PLI";
+ case DW_LANG_ObjC: return "DW_LANG_ObjC";
+ case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus";
+ case DW_LANG_UPC: return "DW_LANG_UPC";
+ case DW_LANG_D: return "DW_LANG_D";
+ case DW_LANG_lo_user: return "DW_LANG_lo_user";
+ case DW_LANG_hi_user: return "DW_LANG_hi_user";
}
return 0;
}
@@ -424,10 +511,10 @@ const char *llvm::dwarf::LanguageString(unsigned Language) {
///
const char *llvm::dwarf::CaseString(unsigned Case) {
switch (Case) {
- case DW_ID_case_sensitive: return "ID_case_sensitive";
- case DW_ID_up_case: return "ID_up_case";
- case DW_ID_down_case: return "ID_down_case";
- case DW_ID_case_insensitive: return "ID_case_insensitive";
+ case DW_ID_case_sensitive: return "DW_ID_case_sensitive";
+ case DW_ID_up_case: return "DW_ID_up_case";
+ case DW_ID_down_case: return "DW_ID_down_case";
+ case DW_ID_case_insensitive: return "DW_ID_case_insensitive";
}
return 0;
}
@@ -436,11 +523,11 @@ const char *llvm::dwarf::CaseString(unsigned Case) {
///
const char *llvm::dwarf::ConventionString(unsigned Convention) {
switch (Convention) {
- case DW_CC_normal: return "CC_normal";
- case DW_CC_program: return "CC_program";
- case DW_CC_nocall: return "CC_nocall";
- case DW_CC_lo_user: return "CC_lo_user";
- case DW_CC_hi_user: return "CC_hi_user";
+ case DW_CC_normal: return "DW_CC_normal";
+ case DW_CC_program: return "DW_CC_program";
+ case DW_CC_nocall: return "DW_CC_nocall";
+ case DW_CC_lo_user: return "DW_CC_lo_user";
+ case DW_CC_hi_user: return "DW_CC_hi_user";
}
return 0;
}
@@ -449,10 +536,10 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) {
///
const char *llvm::dwarf::InlineCodeString(unsigned Code) {
switch (Code) {
- case DW_INL_not_inlined: return "INL_not_inlined";
- case DW_INL_inlined: return "INL_inlined";
- case DW_INL_declared_not_inlined: return "INL_declared_not_inlined";
- case DW_INL_declared_inlined: return "INL_declared_inlined";
+ case DW_INL_not_inlined: return "DW_INL_not_inlined";
+ case DW_INL_inlined: return "DW_INL_inlined";
+ case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined";
+ case DW_INL_declared_inlined: return "DW_INL_declared_inlined";
}
return 0;
}
@@ -461,8 +548,8 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) {
///
const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
switch (Order) {
- case DW_ORD_row_major: return "ORD_row_major";
- case DW_ORD_col_major: return "ORD_col_major";
+ case DW_ORD_row_major: return "DW_ORD_row_major";
+ case DW_ORD_col_major: return "DW_ORD_col_major";
}
return 0;
}
@@ -471,8 +558,8 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
/// descriptor.
const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
switch (Discriminant) {
- case DW_DSC_label: return "DSC_label";
- case DW_DSC_range: return "DSC_range";
+ case DW_DSC_label: return "DW_DSC_label";
+ case DW_DSC_range: return "DW_DSC_range";
}
return 0;
}
@@ -481,18 +568,18 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
///
const char *llvm::dwarf::LNStandardString(unsigned Standard) {
switch (Standard) {
- case DW_LNS_copy: return "LNS_copy";
- case DW_LNS_advance_pc: return "LNS_advance_pc";
- case DW_LNS_advance_line: return "LNS_advance_line";
- case DW_LNS_set_file: return "LNS_set_file";
- case DW_LNS_set_column: return "LNS_set_column";
- case DW_LNS_negate_stmt: return "LNS_negate_stmt";
- case DW_LNS_set_basic_block: return "LNS_set_basic_block";
- case DW_LNS_const_add_pc: return "LNS_const_add_pc";
- case DW_LNS_fixed_advance_pc: return "LNS_fixed_advance_pc";
- case DW_LNS_set_prologue_end: return "LNS_set_prologue_end";
- case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin";
- case DW_LNS_set_isa: return "LNS_set_isa";
+ case DW_LNS_copy: return "DW_LNS_copy";
+ case DW_LNS_advance_pc: return "DW_LNS_advance_pc";
+ case DW_LNS_advance_line: return "DW_LNS_advance_line";
+ case DW_LNS_set_file: return "DW_LNS_set_file";
+ case DW_LNS_set_column: return "DW_LNS_set_column";
+ case DW_LNS_negate_stmt: return "DW_LNS_negate_stmt";
+ case DW_LNS_set_basic_block: return "DW_LNS_set_basic_block";
+ case DW_LNS_const_add_pc: return "DW_LNS_const_add_pc";
+ case DW_LNS_fixed_advance_pc: return "DW_LNS_fixed_advance_pc";
+ case DW_LNS_set_prologue_end: return "DW_LNS_set_prologue_end";
+ case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin";
+ case DW_LNS_set_isa: return "DW_LNS_set_isa";
}
return 0;
}
@@ -502,11 +589,11 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) {
const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
switch (Encoding) {
// Line Number Extended Opcode Encodings
- case DW_LNE_end_sequence: return "LNE_end_sequence";
- case DW_LNE_set_address: return "LNE_set_address";
- case DW_LNE_define_file: return "LNE_define_file";
- case DW_LNE_lo_user: return "LNE_lo_user";
- case DW_LNE_hi_user: return "LNE_hi_user";
+ case DW_LNE_end_sequence: return "DW_LNE_end_sequence";
+ case DW_LNE_set_address: return "DW_LNE_set_address";
+ case DW_LNE_define_file: return "DW_LNE_define_file";
+ case DW_LNE_lo_user: return "DW_LNE_lo_user";
+ case DW_LNE_hi_user: return "DW_LNE_hi_user";
}
return 0;
}
@@ -516,11 +603,11 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
switch (Encoding) {
// Macinfo Type Encodings
- case DW_MACINFO_define: return "MACINFO_define";
- case DW_MACINFO_undef: return "MACINFO_undef";
- case DW_MACINFO_start_file: return "MACINFO_start_file";
- case DW_MACINFO_end_file: return "MACINFO_end_file";
- case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext";
+ case DW_MACINFO_define: return "DW_MACINFO_define";
+ case DW_MACINFO_undef: return "DW_MACINFO_undef";
+ case DW_MACINFO_start_file: return "DW_MACINFO_start_file";
+ case DW_MACINFO_end_file: return "DW_MACINFO_end_file";
+ case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext";
}
return 0;
}
@@ -529,33 +616,33 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
/// encodings.
const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
switch (Encoding) {
- case DW_CFA_advance_loc: return "CFA_advance_loc";
- case DW_CFA_offset: return "CFA_offset";
- case DW_CFA_restore: return "CFA_restore";
- case DW_CFA_set_loc: return "CFA_set_loc";
- case DW_CFA_advance_loc1: return "CFA_advance_loc1";
- case DW_CFA_advance_loc2: return "CFA_advance_loc2";
- case DW_CFA_advance_loc4: return "CFA_advance_loc4";
- case DW_CFA_offset_extended: return "CFA_offset_extended";
- case DW_CFA_restore_extended: return "CFA_restore_extended";
- case DW_CFA_undefined: return "CFA_undefined";
- case DW_CFA_same_value: return "CFA_same_value";
- case DW_CFA_register: return "CFA_register";
- case DW_CFA_remember_state: return "CFA_remember_state";
- case DW_CFA_restore_state: return "CFA_restore_state";
- case DW_CFA_def_cfa: return "CFA_def_cfa";
- case DW_CFA_def_cfa_register: return "CFA_def_cfa_register";
- case DW_CFA_def_cfa_offset: return "CFA_def_cfa_offset";
- case DW_CFA_def_cfa_expression: return "CFA_def_cfa_expression";
- case DW_CFA_expression: return "CFA_expression";
- case DW_CFA_offset_extended_sf: return "CFA_offset_extended_sf";
- case DW_CFA_def_cfa_sf: return "CFA_def_cfa_sf";
- case DW_CFA_def_cfa_offset_sf: return "CFA_def_cfa_offset_sf";
- case DW_CFA_val_offset: return "CFA_val_offset";
- case DW_CFA_val_offset_sf: return "CFA_val_offset_sf";
- case DW_CFA_val_expression: return "CFA_val_expression";
- case DW_CFA_lo_user: return "CFA_lo_user";
- case DW_CFA_hi_user: return "CFA_hi_user";
+ case DW_CFA_advance_loc: return "DW_CFA_advance_loc";
+ case DW_CFA_offset: return "DW_CFA_offset";
+ case DW_CFA_restore: return "DW_CFA_restore";
+ case DW_CFA_set_loc: return "DW_CFA_set_loc";
+ case DW_CFA_advance_loc1: return "DW_CFA_advance_loc1";
+ case DW_CFA_advance_loc2: return "DW_CFA_advance_loc2";
+ case DW_CFA_advance_loc4: return "DW_CFA_advance_loc4";
+ case DW_CFA_offset_extended: return "DW_CFA_offset_extended";
+ case DW_CFA_restore_extended: return "DW_CFA_restore_extended";
+ case DW_CFA_undefined: return "DW_CFA_undefined";
+ case DW_CFA_same_value: return "DW_CFA_same_value";
+ case DW_CFA_register: return "DW_CFA_register";
+ case DW_CFA_remember_state: return "DW_CFA_remember_state";
+ case DW_CFA_restore_state: return "DW_CFA_restore_state";
+ case DW_CFA_def_cfa: return "DW_CFA_def_cfa";
+ case DW_CFA_def_cfa_register: return "DW_CFA_def_cfa_register";
+ case DW_CFA_def_cfa_offset: return "DW_CFA_def_cfa_offset";
+ case DW_CFA_def_cfa_expression: return "DW_CFA_def_cfa_expression";
+ case DW_CFA_expression: return "DW_CFA_expression";
+ case DW_CFA_offset_extended_sf: return "DW_CFA_offset_extended_sf";
+ case DW_CFA_def_cfa_sf: return "DW_CFA_def_cfa_sf";
+ case DW_CFA_def_cfa_offset_sf: return "DW_CFA_def_cfa_offset_sf";
+ case DW_CFA_val_offset: return "DW_CFA_val_offset";
+ case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf";
+ case DW_CFA_val_expression: return "DW_CFA_val_expression";
+ case DW_CFA_lo_user: return "DW_CFA_lo_user";
+ case DW_CFA_hi_user: return "DW_CFA_hi_user";
}
return 0;
}
diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp
index 095395f..1bde2fe 100644
--- a/lib/Support/FileUtilities.cpp
+++ b/lib/Support/FileUtilities.cpp
@@ -51,7 +51,15 @@ static const char *BackupNumber(const char *Pos, const char *FirstChar) {
if (!isNumberChar(*Pos)) return Pos;
// Otherwise, return to the start of the number.
+ bool HasPeriod = false;
while (Pos > FirstChar && isNumberChar(Pos[-1])) {
+ // Backup over at most one period.
+ if (Pos[-1] == '.') {
+ if (HasPeriod)
+ break;
+ HasPeriod = true;
+ }
+
--Pos;
if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1]))
break;
@@ -204,16 +212,16 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA,
const char *F1P = File1Start;
const char *F2P = File2Start;
- if (A_size == B_size) {
- // Are the buffers identical? Common case: Handle this efficiently.
- if (std::memcmp(File1Start, File2Start, A_size) == 0)
- return 0;
+ // Are the buffers identical? Common case: Handle this efficiently.
+ if (A_size == B_size &&
+ std::memcmp(File1Start, File2Start, A_size) == 0)
+ return 0;
- if (AbsTol == 0 && RelTol == 0) {
- if (Error)
- *Error = "Files differ without tolerance allowance";
- return 1; // Files different!
- }
+ // Otherwise, we are done a tolerances are set.
+ if (AbsTol == 0 && RelTol == 0) {
+ if (Error)
+ *Error = "Files differ without tolerance allowance";
+ return 1; // Files different!
}
bool CompareFailed = false;
diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp
index 3f467fe..b8dca33 100644
--- a/lib/Support/FoldingSet.cpp
+++ b/lib/Support/FoldingSet.cpp
@@ -175,6 +175,14 @@ static void **GetBucketFor(const FoldingSetNodeID &ID,
return Buckets + BucketNum;
}
+/// AllocateBuckets - Allocated initialized bucket memory.
+static void **AllocateBuckets(unsigned NumBuckets) {
+ void **Buckets = static_cast<void**>(calloc(NumBuckets+1, sizeof(void*)));
+ // Set the very last bucket to be a non-null "pointer".
+ Buckets[NumBuckets] = reinterpret_cast<void*>(-1);
+ return Buckets;
+}
+
//===----------------------------------------------------------------------===//
// FoldingSetImpl Implementation
@@ -182,11 +190,11 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) {
assert(5 < Log2InitSize && Log2InitSize < 32 &&
"Initial hash table size out of range");
NumBuckets = 1 << Log2InitSize;
- Buckets = new void*[NumBuckets+1];
- clear();
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
}
FoldingSetImpl::~FoldingSetImpl() {
- delete [] Buckets;
+ free(Buckets);
}
void FoldingSetImpl::clear() {
// Set all but the last bucket to null pointers.
@@ -207,8 +215,8 @@ void FoldingSetImpl::GrowHashTable() {
NumBuckets <<= 1;
// Clear out new buckets.
- Buckets = new void*[NumBuckets+1];
- clear();
+ Buckets = AllocateBuckets(NumBuckets);
+ NumNodes = 0;
// Walk the old buckets, rehashing nodes into their new place.
FoldingSetNodeID ID;
@@ -227,7 +235,7 @@ void FoldingSetImpl::GrowHashTable() {
}
}
- delete[] OldBuckets;
+ free(OldBuckets);
}
/// FindNodeOrInsertPos - Look up the node specified by ID. If it exists,
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 2b95089..542162d 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/MathExtras.h"
#include "llvm/System/Errno.h"
#include "llvm/System/Path.h"
#include "llvm/System/Process.h"
@@ -37,22 +38,7 @@ using namespace llvm;
// MemoryBuffer implementation itself.
//===----------------------------------------------------------------------===//
-MemoryBuffer::~MemoryBuffer() {
- if (MustDeleteBuffer)
- free((void*)BufferStart);
-}
-
-/// initCopyOf - Initialize this source buffer with a copy of the specified
-/// memory range. We make the copy so that we can null terminate it
-/// successfully.
-void MemoryBuffer::initCopyOf(const char *BufStart, const char *BufEnd) {
- size_t Size = BufEnd-BufStart;
- BufferStart = (char *)malloc(Size+1);
- BufferEnd = BufferStart+Size;
- memcpy(const_cast<char*>(BufferStart), BufStart, Size);
- *const_cast<char*>(BufferEnd) = 0; // Null terminate buffer.
- MustDeleteBuffer = true;
-}
+MemoryBuffer::~MemoryBuffer() { }
/// init - Initialize this MemoryBuffer as a reference to externally allocated
/// memory, memory that we know is already null terminated.
@@ -60,27 +46,38 @@ void MemoryBuffer::init(const char *BufStart, const char *BufEnd) {
assert(BufEnd[0] == 0 && "Buffer is not null terminated!");
BufferStart = BufStart;
BufferEnd = BufEnd;
- MustDeleteBuffer = false;
}
//===----------------------------------------------------------------------===//
// MemoryBufferMem implementation.
//===----------------------------------------------------------------------===//
+/// CopyStringRef - Copies contents of a StringRef into a block of memory and
+/// null-terminates it.
+static void CopyStringRef(char *Memory, StringRef Data) {
+ memcpy(Memory, Data.data(), Data.size());
+ Memory[Data.size()] = 0; // Null terminate string.
+}
+
+/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it.
+template <typename T>
+static T* GetNamedBuffer(StringRef Buffer, StringRef Name) {
+ char *Mem = static_cast<char*>(operator new(sizeof(T) + Name.size() + 1));
+ CopyStringRef(Mem + sizeof(T), Name);
+ return new (Mem) T(Buffer);
+}
+
namespace {
+/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
class MemoryBufferMem : public MemoryBuffer {
- std::string FileID;
public:
- MemoryBufferMem(StringRef InputData, StringRef FID, bool Copy = false)
- : FileID(FID) {
- if (!Copy)
- init(InputData.data(), InputData.data()+InputData.size());
- else
- initCopyOf(InputData.data(), InputData.data()+InputData.size());
+ MemoryBufferMem(StringRef InputData) {
+ init(InputData.begin(), InputData.end());
}
-
+
virtual const char *getBufferIdentifier() const {
- return FileID.c_str();
+ // The name is stored after the class itself.
+ return reinterpret_cast<const char*>(this + 1);
}
};
}
@@ -88,42 +85,55 @@ public:
/// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note
/// that EndPtr[0] must be a null byte and be accessible!
MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData,
- const char *BufferName) {
- return new MemoryBufferMem(InputData, BufferName);
+ StringRef BufferName) {
+ return GetNamedBuffer<MemoryBufferMem>(InputData, BufferName);
}
/// getMemBufferCopy - Open the specified memory range as a MemoryBuffer,
/// copying the contents and taking ownership of it. This has no requirements
/// on EndPtr[0].
MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData,
- const char *BufferName) {
- return new MemoryBufferMem(InputData, BufferName, true);
+ StringRef BufferName) {
+ MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName);
+ if (!Buf) return 0;
+ memcpy(const_cast<char*>(Buf->getBufferStart()), InputData.data(),
+ InputData.size());
+ return Buf;
}
/// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size
-/// that is completely initialized to zeros. Note that the caller should
-/// initialize the memory allocated by this method. The memory is owned by
-/// the MemoryBuffer object.
+/// that is not initialized. Note that the caller should initialize the
+/// memory allocated by this method. The memory is owned by the MemoryBuffer
+/// object.
MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
StringRef BufferName) {
- char *Buf = (char *)malloc(Size+1);
- if (!Buf) return 0;
- Buf[Size] = 0;
- MemoryBufferMem *SB = new MemoryBufferMem(StringRef(Buf, Size), BufferName);
- // The memory for this buffer is owned by the MemoryBuffer.
- SB->MustDeleteBuffer = true;
- return SB;
+ // Allocate space for the MemoryBuffer, the data and the name. It is important
+ // that MemoryBuffer and data are aligned so PointerIntPair works with them.
+ size_t AlignedStringLen =
+ RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1,
+ sizeof(void*)); // TODO: Is sizeof(void*) enough?
+ size_t RealLen = AlignedStringLen + Size + 1;
+ char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
+ if (!Mem) return 0;
+
+ // The name is stored after the class itself.
+ CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName);
+
+ // The buffer begins after the name and must be aligned.
+ char *Buf = Mem + AlignedStringLen;
+ Buf[Size] = 0; // Null terminate buffer.
+
+ return new (Mem) MemoryBufferMem(StringRef(Buf, Size));
}
/// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that
/// is completely initialized to zeros. Note that the caller should
/// initialize the memory allocated by this method. The memory is owned by
/// the MemoryBuffer object.
-MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size,
- const char *BufferName) {
+MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) {
MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName);
if (!SB) return 0;
- memset(const_cast<char*>(SB->getBufferStart()), 0, Size+1);
+ memset(const_cast<char*>(SB->getBufferStart()), 0, Size);
return SB;
}
@@ -137,7 +147,16 @@ MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
int64_t FileSize,
struct stat *FileInfo) {
if (Filename == "-")
- return getSTDIN();
+ return getSTDIN(ErrStr);
+ return getFile(Filename, ErrStr, FileSize, FileInfo);
+}
+
+MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
+ std::string *ErrStr,
+ int64_t FileSize,
+ struct stat *FileInfo) {
+ if (strcmp(Filename, "-") == 0)
+ return getSTDIN(ErrStr);
return getFile(Filename, ErrStr, FileSize, FileInfo);
}
@@ -149,18 +168,11 @@ namespace {
/// MemoryBufferMMapFile - This represents a file that was mapped in with the
/// sys::Path::MapInFilePages method. When destroyed, it calls the
/// sys::Path::UnMapFilePages method.
-class MemoryBufferMMapFile : public MemoryBuffer {
- std::string Filename;
+class MemoryBufferMMapFile : public MemoryBufferMem {
public:
- MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size)
- : Filename(filename) {
- init(Pages, Pages+Size);
- }
-
- virtual const char *getBufferIdentifier() const {
- return Filename.c_str();
- }
-
+ MemoryBufferMMapFile(StringRef Buffer)
+ : MemoryBufferMem(Buffer) { }
+
~MemoryBufferMMapFile() {
sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
}
@@ -170,19 +182,24 @@ public:
class FileCloser {
int FD;
public:
- FileCloser(int FD) : FD(FD) {}
+ explicit FileCloser(int FD) : FD(FD) {}
~FileCloser() { ::close(FD); }
};
}
MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
int64_t FileSize, struct stat *FileInfo) {
- int OpenFlags = 0;
+ SmallString<256> PathBuf(Filename.begin(), Filename.end());
+ return MemoryBuffer::getFile(PathBuf.c_str(), ErrStr, FileSize, FileInfo);
+}
+
+MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
+ int64_t FileSize, struct stat *FileInfo) {
+ int OpenFlags = O_RDONLY;
#ifdef O_BINARY
OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
#endif
- SmallString<256> PathBuf(Filename.begin(), Filename.end());
- int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags);
+ int FD = ::open(Filename, OpenFlags);
if (FD == -1) {
if (ErrStr) *ErrStr = sys::StrError();
return 0;
@@ -213,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
if (FileSize >= 4096*4 &&
(FileSize & (sys::Process::GetPageSize()-1)) != 0) {
if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
- // Close the file descriptor, now that the whole file is in memory.
- return new MemoryBufferMMapFile(Filename, Pages, FileSize);
+ return GetNamedBuffer<MemoryBufferMMapFile>(StringRef(Pages, FileSize),
+ Filename);
}
}
@@ -254,34 +271,27 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
// MemoryBuffer::getSTDIN implementation.
//===----------------------------------------------------------------------===//
-namespace {
-class STDINBufferFile : public MemoryBuffer {
-public:
- virtual const char *getBufferIdentifier() const {
- return "<stdin>";
- }
-};
-}
-
-MemoryBuffer *MemoryBuffer::getSTDIN() {
- char Buffer[4096*4];
-
- std::vector<char> FileData;
-
+MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) {
// Read in all of the data from stdin, we cannot mmap stdin.
//
// FIXME: That isn't necessarily true, we should try to mmap stdin and
// fallback if it fails.
sys::Program::ChangeStdinToBinary();
- size_t ReadBytes;
+
+ const ssize_t ChunkSize = 4096*4;
+ SmallString<ChunkSize> Buffer;
+ ssize_t ReadBytes;
+ // Read into Buffer until we hit EOF.
do {
- ReadBytes = fread(Buffer, sizeof(char), sizeof(Buffer), stdin);
- FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes);
- } while (ReadBytes == sizeof(Buffer));
-
- FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end().
- size_t Size = FileData.size();
- MemoryBuffer *B = new STDINBufferFile();
- B->initCopyOf(&FileData[0], &FileData[Size-1]);
- return B;
+ Buffer.reserve(Buffer.size() + ChunkSize);
+ ReadBytes = read(0, Buffer.end(), ChunkSize);
+ if (ReadBytes == -1) {
+ if (errno == EINTR) continue;
+ if (ErrStr) *ErrStr = sys::StrError();
+ return 0;
+ }
+ Buffer.set_size(Buffer.size() + ReadBytes);
+ } while (ReadBytes != 0);
+
+ return getMemBufferCopy(Buffer, "<stdin>");
}
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 7a04a53..a99ab2f 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -12,11 +12,17 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/Config/config.h" // Get autoconf configuration settings
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Signals.h"
#include "llvm/System/ThreadLocal.h"
#include "llvm/ADT/SmallString.h"
+
+#ifdef HAVE_CRASHREPORTERCLIENT_H
+#include <CrashReporterClient.h>
+#endif
+
using namespace llvm;
namespace llvm {
@@ -48,8 +54,17 @@ static void PrintCurStackTrace(raw_ostream &OS) {
OS.flush();
}
-// Integrate with crash reporter.
-#ifdef __APPLE__
+// Integrate with crash reporter libraries.
+#if defined (__APPLE__) && defined (HAVE_CRASHREPORTERCLIENT_H)
+// If any clients of llvm try to link to libCrashReporterClient.a themselves,
+// only one crash info struct will be used.
+extern "C" {
+CRASH_REPORTER_CLIENT_HIDDEN
+struct crashreporter_annotations_t gCRAnnotations
+ __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION)))
+ = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 };
+}
+#elif defined (__APPLE__)
static const char *__crashreporter_info__ = 0;
asm(".desc ___crashreporter_info__, 0x10");
#endif
@@ -71,7 +86,11 @@ static void CrashHandler(void *Cookie) {
}
if (!TmpStr.empty()) {
+#ifndef HAVE_CRASHREPORTERCLIENT_H
__crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str());
+#else
+ CRSetCrashLogMessage(std::string(TmpStr.str()).c_str());
+#endif
errs() << TmpStr.str();
}
diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp
index 68938fa..504e649 100644
--- a/lib/Support/SmallPtrSet.cpp
+++ b/lib/Support/SmallPtrSet.cpp
@@ -166,10 +166,13 @@ void SmallPtrSetImpl::Grow() {
}
}
-SmallPtrSetImpl::SmallPtrSetImpl(const SmallPtrSetImpl& that) {
+SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage,
+ const SmallPtrSetImpl& that) {
+ SmallArray = SmallStorage;
+
// If we're becoming small, prepare to insert into our stack space
if (that.isSmall()) {
- CurArray = &SmallArray[0];
+ CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else {
CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1));
@@ -197,7 +200,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) {
if (RHS.isSmall()) {
if (!isSmall())
free(CurArray);
- CurArray = &SmallArray[0];
+ CurArray = SmallArray;
// Otherwise, allocate new heap space (unless we were the same size)
} else if (CurArraySize != RHS.CurArraySize) {
if (isSmall())
diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp
index 6821382..2e17af8 100644
--- a/lib/Support/SmallVector.cpp
+++ b/lib/Support/SmallVector.cpp
@@ -21,15 +21,18 @@ void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) {
size_t NewCapacityInBytes = 2 * capacity_in_bytes();
if (NewCapacityInBytes < MinSizeInBytes)
NewCapacityInBytes = MinSizeInBytes;
- void *NewElts = operator new(NewCapacityInBytes);
-
- // Copy the elements over. No need to run dtors on PODs.
- memcpy(NewElts, this->BeginX, CurSizeBytes);
-
- // If this wasn't grown from the inline copy, deallocate the old space.
- if (!this->isSmall())
- operator delete(this->BeginX);
-
+
+ void *NewElts;
+ if (this->isSmall()) {
+ NewElts = malloc(NewCapacityInBytes);
+
+ // Copy the elements over. No need to run dtors on PODs.
+ memcpy(NewElts, this->BeginX, CurSizeBytes);
+ } else {
+ // If this wasn't grown from the inline copy, grow the allocated space.
+ NewElts = realloc(this->BeginX, NewCapacityInBytes);
+ }
+
this->EndX = (char*)NewElts+CurSizeBytes;
this->BeginX = NewElts;
this->CapacityX = (char*)this->BeginX + NewCapacityInBytes;
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 784b77c..44ee177 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -236,11 +236,13 @@ static Timer &getNamedRegionTimer(StringRef Name) {
return T;
}
-NamedRegionTimer::NamedRegionTimer(StringRef Name)
- : TimeRegion(getNamedRegionTimer(Name)) {}
+NamedRegionTimer::NamedRegionTimer(StringRef Name,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {}
-NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName)
- : TimeRegion(NamedGroupedTimers->get(Name, GroupName)) {}
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
+ bool Enabled)
+ : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {}
//===----------------------------------------------------------------------===//
// TimerGroup Implementation
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 9796ca5..6a70449 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -104,6 +104,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case Solaris: return "solaris";
case Win32: return "win32";
case Haiku: return "haiku";
+ case Minix: return "minix";
}
return "<invalid>";
@@ -326,7 +327,9 @@ void Triple::Parse() const {
else if (OSName.startswith("win32"))
OS = Win32;
else if (OSName.startswith("haiku"))
- OS = Haiku;
+ OS = Haiku;
+ else if (OSName.startswith("minix"))
+ OS = Minix;
else
OS = UnknownOS;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 11cf0ec..8054ae6 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -427,10 +427,9 @@ raw_fd_ostream::~raw_fd_ostream() {
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
- ssize_t ret;
do {
- ret = ::write(FD, Ptr, Size);
+ ssize_t ret = ::write(FD, Ptr, Size);
if (ret < 0) {
// If it's a recoverable error, swallow it and retry the write.
@@ -482,7 +481,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
}
size_t raw_fd_ostream::preferred_buffer_size() const {
-#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX)
+#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix)
// Windows and Minix have no st_blksize.
assert(FD >= 0 && "File not yet open!");
struct stat statbuf;
@@ -496,8 +495,9 @@ size_t raw_fd_ostream::preferred_buffer_size() const {
return 0;
// Return the preferred block size.
return statbuf.st_blksize;
-#endif
+#else
return raw_ostream::preferred_buffer_size();
+#endif
}
raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold,
diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp
index bad427a..139e3be 100644
--- a/lib/System/Disassembler.cpp
+++ b/lib/System/Disassembler.cpp
@@ -44,33 +44,29 @@ std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length,
uint64_t pc) {
std::stringstream res;
-#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__)
+#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \
+ && USE_UDIS86
unsigned bits;
# if defined(__i386__)
bits = 32;
# else
bits = 64;
# endif
-
-# if USE_UDIS86
+
ud_t ud_obj;
-
+
ud_init(&ud_obj);
ud_set_input_buffer(&ud_obj, start, length);
ud_set_mode(&ud_obj, bits);
ud_set_pc(&ud_obj, pc);
ud_set_syntax(&ud_obj, UD_SYN_ATT);
-
+
res << std::setbase(16)
<< std::setw(bits/4);
-
+
while (ud_disassemble(&ud_obj)) {
res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n";
}
-# else
- res << "No disassembler available. See configure help for options.\n";
-# endif
-
#else
res << "No disassembler available. See configure help for options.\n";
#endif
diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp
index 6844530..1235257 100644
--- a/lib/System/Path.cpp
+++ b/lib/System/Path.cpp
@@ -136,26 +136,23 @@ sys::IdentifyFileType(const char *magic, unsigned length) {
bool
Path::isArchive() const {
- if (canRead())
- return hasMagicNumber("!<arch>\012");
- return false;
+ return hasMagicNumber("!<arch>\012");
}
bool
Path::isDynamicLibrary() const {
- if (canRead()) {
- std::string Magic;
- if (getMagicNumber(Magic, 64))
- switch (IdentifyFileType(Magic.c_str(),
- static_cast<unsigned>(Magic.length()))) {
- default: return false;
- case Mach_O_FixedVirtualMemorySharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLib_FileType:
- case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
- case ELF_SharedObject_FileType:
- case COFF_FileType: return true;
- }
- }
+ std::string Magic;
+ if (getMagicNumber(Magic, 64))
+ switch (IdentifyFileType(Magic.c_str(),
+ static_cast<unsigned>(Magic.length()))) {
+ default: return false;
+ case Mach_O_FixedVirtualMemorySharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLib_FileType:
+ case Mach_O_DynamicallyLinkedSharedLibStub_FileType:
+ case ELF_SharedObject_FileType:
+ case COFF_FileType: return true;
+ }
+
return false;
}
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index 74596dc..bc104a3 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -421,10 +421,8 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const {
return false;
ssize_t bytes_read = ::read(fd, Buf, len);
::close(fd);
- if (ssize_t(len) != bytes_read) {
- Magic.clear();
+ if (ssize_t(len) != bytes_read)
return false;
- }
Magic.assign(Buf, len);
return true;
}
@@ -890,14 +888,19 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
#else
// Okay, looks like we have to do it all by our lonesome.
static unsigned FCounter = 0;
- unsigned offset = path.size() + 1;
- while ( FCounter < 999999 && exists()) {
- sprintf(FNBuffer+offset,"%06u",++FCounter);
+ // Try to initialize with unique value.
+ if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8;
+ char* pos = strstr(FNBuffer, "XXXXXX");
+ do {
+ if (++FCounter > 0xFFFFFF) {
+ return MakeErrMsg(ErrMsg,
+ path + ": can't make unique filename: too many files");
+ }
+ sprintf(pos, "%06X", FCounter);
path = FNBuffer;
- }
- if (FCounter > 999999)
- return MakeErrMsg(ErrMsg,
- path + ": can't make unique filename: too many files");
+ } while (exists());
+ // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit
+ // LLVM.
#endif
return false;
}
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index 358415f..67018de 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -310,7 +310,7 @@ Program::Wait(unsigned secondsToWait,
// fact of having a handler at all causes the wait below to return with EINTR,
// unlike if we used SIG_IGN.
if (secondsToWait) {
-#ifndef __HAIKU__
+#if !defined(__HAIKU__) && !defined(__minix)
Act.sa_sigaction = 0;
#endif
Act.sa_handler = TimeOutHandler;
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 9548816..1e74647 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -111,6 +111,14 @@ static void UnregisterHandlers() {
}
+/// RemoveFilesToRemove - Process the FilesToRemove list. This function
+/// should be called with the SignalsMutex lock held.
+static void RemoveFilesToRemove() {
+ while (!FilesToRemove.empty()) {
+ FilesToRemove.back().eraseFromDisk(true);
+ FilesToRemove.pop_back();
+ }
+}
// SignalHandler - The signal handler that runs.
static RETSIGTYPE SignalHandler(int Sig) {
@@ -126,10 +134,7 @@ static RETSIGTYPE SignalHandler(int Sig) {
sigprocmask(SIG_UNBLOCK, &SigMask, 0);
SignalsMutex.acquire();
- while (!FilesToRemove.empty()) {
- FilesToRemove.back().eraseFromDisk(true);
- FilesToRemove.pop_back();
- }
+ RemoveFilesToRemove();
if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) {
if (InterruptFunction) {
@@ -153,7 +158,9 @@ static RETSIGTYPE SignalHandler(int Sig) {
}
void llvm::sys::RunInterruptHandlers() {
- SignalHandler(SIGINT);
+ SignalsMutex.acquire();
+ RemoveFilesToRemove();
+ SignalsMutex.release();
}
void llvm::sys::SetInterruptFunction(void (*IF)()) {
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index 5a0052f..379527d 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -281,12 +281,6 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
// FIXME: the above set of functions don't map to Windows very well.
-bool
-Path::isRootDirectory() const {
- size_t len = path.size();
- return len > 0 && path[len-1] == '/';
-}
-
StringRef Path::getDirname() const {
return getDirnameCharSep(path, "/");
}
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index a3a393c..d6db71b 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -283,7 +283,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
#ifdef _MSC_VER
if (ExitOnUnhandledExceptions)
- _exit(-3);
+ _exit(-3);
#endif
// Allow dialog box to pop up allowing choice to start debugger.
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ae7ae59..14825a7 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool ModelWithRegSequence();
-
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index e68354a..d316b13 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,6 +520,70 @@ namespace ARM_AM {
// This is stored in two operands [regaddr, align]. The first is the
// address register. The second operand is the value of the alignment
// specifier to use or zero if no explicit alignment.
+ // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
+ // instruction.
+
+ //===--------------------------------------------------------------------===//
+ // NEON Modified Immediates
+ //===--------------------------------------------------------------------===//
+ //
+ // Several NEON instructions (e.g., VMOV) take a "modified immediate"
+ // vector operand, where a small immediate encoded in the instruction
+ // specifies a full NEON vector value. These modified immediates are
+ // represented here as encoded integers. The low 8 bits hold the immediate
+ // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold
+ // the "Cmode" field of the instruction. The interfaces below treat the
+ // Op and Cmode values as a single 5-bit value.
+
+ static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) {
+ return (OpCmode << 8) | Val;
+ }
+ static inline unsigned getNEONModImmOpCmode(unsigned ModImm) {
+ return (ModImm >> 8) & 0x1f;
+ }
+ static inline unsigned getNEONModImmVal(unsigned ModImm) {
+ return ModImm & 0xff;
+ }
+
+ /// decodeNEONModImm - Decode a NEON modified immediate value into the
+ /// element value and the element size in bits. (If the element size is
+ /// smaller than the vector, it is splatted into all the elements.)
+ static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) {
+ unsigned OpCmode = getNEONModImmOpCmode(ModImm);
+ unsigned Imm8 = getNEONModImmVal(ModImm);
+ uint64_t Val = 0;
+
+ if (OpCmode == 0xe) {
+ // 8-bit vector elements
+ Val = Imm8;
+ EltBits = 8;
+ } else if ((OpCmode & 0xc) == 0x8) {
+ // 16-bit vector elements
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 16;
+ } else if ((OpCmode & 0x8) == 0) {
+ // 32-bit vector elements, zero with one byte set
+ unsigned ByteNum = (OpCmode & 0x6) >> 1;
+ Val = Imm8 << (8 * ByteNum);
+ EltBits = 32;
+ } else if ((OpCmode & 0xe) == 0xc) {
+ // 32-bit vector elements, one byte with low bits set
+ unsigned ByteNum = 1 + (OpCmode & 0x1);
+ Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum)));
+ EltBits = 32;
+ } else if (OpCmode == 0x1e) {
+ // 64-bit vector elements
+ for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) {
+ if ((ModImm >> ByteNum) & 1)
+ Val |= (uint64_t)0xff << (8 * ByteNum);
+ }
+ EltBits = 64;
+ } else {
+ assert(false && "Unsupported NEON immediate");
+ }
+ return Val;
+ }
} // end namespace ARM_AM
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 2528854..49c16f3 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *MI = MBBI;
MachineFunction &MF = *MI->getParent()->getParent();
- unsigned TSFlags = MI->getDesc().TSFlags;
+ uint64_t TSFlags = MI->getDesc().TSFlags;
bool isPre = false;
switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
default: return NULL;
@@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
bool
ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI,
- const TargetRegisterInfo *TRI) const {
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Insert the spill to the stack frame. The register is killed at the spill
//
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
storeRegToStackSlot(MBB, MI, Reg, isKill,
- CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+ CSI[i].getFrameIdx(), RC, TRI);
}
return true;
}
@@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
int BOpc = !AFI->isThumbFunction()
? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch?
- BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
return 1;
}
// Two-way conditional branch.
- BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
+ BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
- BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
return 2;
}
@@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// Basic size info comes from the TSFlags field.
const TargetInstrDesc &TID = MI->getDesc();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
unsigned Opc = MI->getOpcode();
switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
@@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
- return 24;
+ return 20;
case ARM::tInt_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp:
case ARM::t2Int_eh_sjlj_setjmp_nofp:
- return 14;
+ return 12;
case ARM::BR_JTr:
case ARM::BR_JTm:
case ARM::BR_JTadd:
@@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
return true;
}
case ARM::MOVr:
+ case ARM::MOVr_TC:
case ARM::tMOVr:
case ARM::tMOVgpr2tgpr:
case ARM::tMOVtgpr2gpr:
@@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool
-ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- // tGPR is used sometimes in ARM instructions that need to avoid using
- // certain registers. Just treat it as GPR here.
- if (DestRC == ARM::tGPRRegisterClass)
- DestRC = ARM::GPRRegisterClass;
- if (SrcRC == ARM::tGPRRegisterClass)
- SrcRC = ARM::GPRRegisterClass;
-
- // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies.
- if (DestRC == ARM::DPR_8RegisterClass)
- DestRC = ARM::DPR_VFP2RegisterClass;
- if (SrcRC == ARM::DPR_8RegisterClass)
- SrcRC = ARM::DPR_VFP2RegisterClass;
-
- // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies.
- if (DestRC == ARM::QPR_VFP2RegisterClass ||
- DestRC == ARM::QPR_8RegisterClass)
- DestRC = ARM::QPRRegisterClass;
- if (SrcRC == ARM::QPR_VFP2RegisterClass ||
- SrcRC == ARM::QPR_8RegisterClass)
- SrcRC = ARM::QPRRegisterClass;
-
- // Allow QQPR / QQPR_VFP2 cross-class copies.
- if (DestRC == ARM::QQPR_VFP2RegisterClass)
- DestRC = ARM::QQPRRegisterClass;
- if (SrcRC == ARM::QQPR_VFP2RegisterClass)
- SrcRC = ARM::QQPRRegisterClass;
-
- // Disallow copies of unequal sizes.
- if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
- return false;
-
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::SPRRegisterClass)
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg)
- .addReg(SrcReg));
- else
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
- DestReg).addReg(SrcReg)));
- } else {
- unsigned Opc;
-
- if (DestRC == ARM::SPRRegisterClass)
- Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS);
- else if (DestRC == ARM::DPRRegisterClass)
- Opc = ARM::VMOVD;
- else if (DestRC == ARM::DPR_VFP2RegisterClass ||
- SrcRC == ARM::DPR_VFP2RegisterClass)
- // Always use neon reg-reg move if source or dest is NEON-only regclass.
- Opc = ARM::VMOVDneon;
- else if (DestRC == ARM::QPRRegisterClass)
- Opc = ARM::VMOVQ;
- else if (DestRC == ARM::QQPRRegisterClass)
- Opc = ARM::VMOVQQ;
- else if (DestRC == ARM::QQQQPRRegisterClass)
- Opc = ARM::VMOVQQQQ;
- else
- return false;
-
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
+void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GPRDest = ARM::GPRRegClass.contains(DestReg);
+ bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);
+
+ if (GPRDest && GPRSrc) {
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))));
+ return;
}
- return true;
+ bool SPRDest = ARM::SPRRegClass.contains(DestReg);
+ bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);
+
+ unsigned Opc;
+ if (SPRDest && SPRSrc)
+ Opc = ARM::VMOVS;
+ else if (GPRDest && SPRSrc)
+ Opc = ARM::VMOVRS;
+ else if (SPRDest && GPRSrc)
+ Opc = ARM::VMOVSR;
+ else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVD;
+ else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQ;
+ else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQ;
+ else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
+ Opc = ARM::VMOVQQQQ;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
+ MIB.addReg(SrcReg, getKillRegState(KillSrc));
+ if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
+ AddDefaultPred(MIB);
}
static const
@@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
// FIXME: Neon instructions should support predicates
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addReg(SrcReg, getKillRegState(isKill))
.addMemOperand(MMO));
} else {
@@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
// FIXME: It's possible to only store part of the QQ register if the
// spilled def has a sub-register index.
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
- .addFrameIndex(FI).addImm(128);
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q))
+ .addFrameIndex(FI).addImm(16);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
@@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ break;
+ case ARM::QQQQPRRegClassID: {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
.addFrameIndex(FI)
@@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
- if (RC == ARM::tGPRRegisterClass)
+ if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass)
RC = ARM::GPRRegisterClass;
- if (RC == ARM::GPRRegisterClass) {
+ switch (RC->getID()) {
+ case ARM::GPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
+ break;
+ case ARM::SPRRegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::DPRRegisterClass ||
- RC == ARM::DPR_VFP2RegisterClass ||
- RC == ARM::DPR_8RegisterClass) {
+ break;
+ case ARM::DPRRegClassID:
+ case ARM::DPR_VFP2RegClassID:
+ case ARM::DPR_8RegClassID:
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) {
+ break;
+ case ARM::QPRRegClassID:
+ case ARM::QPR_VFP2RegClassID:
+ case ARM::QPR_8RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
- .addFrameIndex(FI).addImm(128)
+ .addFrameIndex(FI).addImm(16)
.addMemOperand(MMO));
} else {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
@@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
- } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ break;
+ case ARM::QQPRRegClassID:
+ case ARM::QQPR_VFP2RegClassID:
if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
- MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q));
MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+ AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO));
} else {
MachineInstrBuilder MIB =
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
@@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
}
- } else {
- assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
- MachineInstrBuilder MIB =
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
- .addFrameIndex(FI)
- .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
- .addMemOperand(MMO);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
- MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
- AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ case ARM::QQQQPRRegClassID: {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
+ break;
+ }
+ default:
+ llvm_unreachable("Unknown regclass!");
}
}
@@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
return &*MIB;
}
-MachineInstr *ARMBaseInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead())
- return NULL;
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (Opc == ARM::MOVr)
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
- else // ARM::t2MOVr
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef), DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
- }
- } else if (Opc == ARM::VMOVS) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI)
- .addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
- }
- } else if (Opc == ARM::VMOVQ) {
- MachineFrameInfo &MFI = *MF.getFrameInfo();
- unsigned Pred = MI->getOperand(2).getImm();
- unsigned PredReg = MI->getOperand(3).getReg();
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- unsigned SrcSubReg = MI->getOperand(1).getSubReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
- .addFrameIndex(FI).addImm(128)
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
- .addReg(SrcReg,
- getKillRegState(isKill) | getUndefRegState(isUndef),
- SrcSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned DstSubReg = MI->getOperand(0).getSubReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- if (MFI.getObjectAlignment(FI) >= 16 &&
- getRegisterInfo().canRealignStack(MF)) {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
- .addReg(DstReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef),
- DstSubReg)
- .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
- .addImm(Pred).addReg(PredReg);
- }
- }
- }
-
- return NewMI;
-}
-
-MachineInstr*
-ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- // FIXME
- return 0;
-}
-
-bool
-ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) {
- // If it is updating CPSR, then it cannot be folded.
- return MI->getOperand(4).getReg() != ARM::CPSR ||
- MI->getOperand(4).isDead();
- } else if (Opc == ARM::tMOVgpr2gpr ||
- Opc == ARM::tMOVtgpr2gpr ||
- Opc == ARM::tMOVgpr2tgpr) {
- return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
- Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return true;
- }
-
- // FIXME: VMOVQQ and VMOVQQQQ?
-
- return false;
-}
-
/// Create a copy of a const pool value. Update CPI to the new index and return
/// the label UID.
static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
@@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
+ const TargetRegisterInfo &TRI) const {
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
default: {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
+ MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
MBB.insert(I, MI);
break;
}
@@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB,
break;
}
}
-
- MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
}
MachineInstr *
@@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
+/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+/// determine if two loads are loading from the same base address. It should
+/// only return true if the base pointers are the same and the only differences
+/// between the two addresses is the offset. It also returns the offsets by
+/// reference.
+bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1,
+ int64_t &Offset2) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
+ return false;
+
+ switch (Load1->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ switch (Load2->getMachineOpcode()) {
+ default:
+ return false;
+ case ARM::LDR:
+ case ARM::LDRB:
+ case ARM::LDRD:
+ case ARM::LDRH:
+ case ARM::LDRSB:
+ case ARM::LDRSH:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
+ case ARM::t2LDRi8:
+ case ARM::t2LDRDi8:
+ case ARM::t2LDRSHi8:
+ case ARM::t2LDRi12:
+ case ARM::t2LDRSHi12:
+ break;
+ }
+
+ // Check if base addresses and chain operands match.
+ if (Load1->getOperand(0) != Load2->getOperand(0) ||
+ Load1->getOperand(4) != Load2->getOperand(4))
+ return false;
+
+ // Index should be Reg0.
+ if (Load1->getOperand(3) != Load2->getOperand(3))
+ return false;
+
+ // Determine the offsets.
+ if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
+ isa<ConstantSDNode>(Load2->getOperand(1))) {
+ Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
+ Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
+ return true;
+ }
+
+ return false;
+}
+
+/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+/// be scheduled togther. On some targets if two loads are loading from
+/// addresses in the same cache line, it's better if they are scheduled
+/// together. This function takes two integers that represent the load offsets
+/// from the common base address. It returns true if it decides it's desirable
+/// to schedule the two loads together. "NumLoads" is the number of loads that
+/// have already been scheduled after Load1.
+bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const {
+ // Don't worry about Thumb: just ARM and Thumb2.
+ if (Subtarget.isThumb1Only()) return false;
+
+ assert(Offset2 > Offset1);
+
+ if ((Offset2 - Offset1) / 8 > 64)
+ return false;
+
+ if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
+ return false; // FIXME: overly conservative?
+
+ // Four loads in a row should be sufficient.
+ if (NumLoads >= 3)
+ return false;
+
+ return true;
+}
+
+bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const {
+ // Debug info is never a scheduling boundary. It's necessary to be explicit
+ // due to the special treatment of IT instructions below, otherwise a
+ // dbg_value followed by an IT will result in the IT instruction being
+ // considered a scheduling hazard, which is wrong. It should be the actual
+ // instruction preceding the dbg_value instruction(s), just like it is
+ // when debug info is not present.
+ if (MI->isDebugValue())
+ return false;
+
+ // Terminators and labels can't be scheduled around.
+ if (MI->getDesc().isTerminator() || MI->isLabel())
+ return true;
+
+ // Treat the start of the IT block as a scheduling boundary, but schedule
+ // t2IT along with all instructions following it.
+ // FIXME: This is a big hammer. But the alternative is to add all potential
+ // true and anti dependencies to IT block instructions as implicit operands
+ // to the t2IT instruction. The added compile time and complexity does not
+ // seem worth it.
+ MachineBasicBlock::const_iterator I = MI;
+ // Make sure to skip any dbg_value instructions
+ while (++I != MBB->end() && I->isDebugValue())
+ ;
+ if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
+ return true;
+
+ // Don't attempt to schedule around any instruction that defines
+ // a stack-oriented pointer, as it's unlikely to be profitable. This
+ // saves compile time, because it doesn't require every single
+ // stack slot reference to depend on the instruction that does the
+ // modification.
+ if (MI->definesRegister(ARM::SP))
+ return true;
+
+ return false;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const {
+ if (!NumInstrs)
+ return false;
+ if (Subtarget.getCPUString() == "generic")
+ // Generic (and overly aggressive) if-conversion limits for testing.
+ return NumInstrs <= 10;
+ else if (Subtarget.hasV7Ops())
+ return NumInstrs <= 3;
+ return NumInstrs <= 2;
+}
+
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ return NumT && NumF && NumT <= 2 && NumF <= 2;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index b566271..89a2db7 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -116,11 +116,25 @@ namespace ARMII {
// Thumb format
ThumbFrm = 24 << FormShift,
- // NEON format
- NEONFrm = 25 << FormShift,
- NEONGetLnFrm = 26 << FormShift,
- NEONSetLnFrm = 27 << FormShift,
- NEONDupFrm = 28 << FormShift,
+ // Miscelleaneous format
+ MiscFrm = 25 << FormShift,
+
+ // NEON formats
+ NGetLnFrm = 26 << FormShift,
+ NSetLnFrm = 27 << FormShift,
+ NDupFrm = 28 << FormShift,
+ NLdStFrm = 29 << FormShift,
+ N1RegModImmFrm= 30 << FormShift,
+ N2RegFrm = 31 << FormShift,
+ NVCVTFrm = 32 << FormShift,
+ NVDupLnFrm = 33 << FormShift,
+ N2RegVShLFrm = 34 << FormShift,
+ N2RegVShRFrm = 35 << FormShift,
+ N3RegFrm = 36 << FormShift,
+ N3RegVShFrm = 37 << FormShift,
+ NVExtFrm = 38 << FormShift,
+ NVMulSLFrm = 39 << FormShift,
+ NVTBLFrm = 40 << FormShift,
//===------------------------------------------------------------------===//
// Misc flags.
@@ -213,7 +227,8 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -258,12 +273,10 @@ public:
virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -283,29 +296,51 @@ public:
const MDNode *MDPtr,
DebugLoc DL) const;
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const;
-
virtual void reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
virtual bool produceSameValue(const MachineInstr *MI0,
const MachineInstr *MI1) const;
+
+ /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
+ /// determine if two loads are loading from the same base address. It should
+ /// only return true if the base pointers are the same and the only
+ /// differences between the two addresses is the offset. It also returns the
+ /// offsets by reference.
+ virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
+ int64_t &Offset1, int64_t &Offset2)const;
+
+ /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
+ /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// be scheduled togther. On some targets if two loads are loading from
+ /// addresses in the same cache line, it's better if they are scheduled
+ /// together. This function takes two integers that represent the load offsets
+ /// from the common base address. It returns true if it decides it's desirable
+ /// to schedule the two loads together. "NumLoads" is the number of loads that
+ /// have already been scheduled after Load1.
+ virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
+ int64_t Offset1, int64_t Offset2,
+ unsigned NumLoads) const;
+
+ virtual bool isSchedulingBoundary(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ const MachineFunction &MF) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const;
+
+ virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
+ MachineBasicBlock &FMBB,unsigned NumF) const;
+
+ virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs == 1;
+ }
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 82458d2..182bd99 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs;
}
-const TargetRegisterClass* const *
-ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = {
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={
- &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass,
- &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass,
- &ARM::GPRRegClass, &ARM::GPRRegClass,
-
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass,
- 0
- };
-
- if (STI.isThumb1Only()) {
- return STI.isTargetDarwin()
- ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses;
- }
- return STI.isTargetDarwin()
- ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses;
-}
-
BitVector ARMBaseRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
// FIXME: avoid re-calculating this everytime.
@@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
}
bool
-ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const {
@@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
I != E; ++I) {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!I->getOperand(i).isFI()) continue;
+
+ // When using ADDri to get the address of a stack object, 255 is the
+ // largest offset guaranteed to fit in the immediate offset.
+ if (I->getOpcode() == ARM::ADDri) {
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ }
+
+ // Otherwise check the addressing mode.
switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
case ARMII::AddrMode3:
case ARMII::AddrModeT2_i8:
@@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS1GPRs;
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
// Spill R4 if Thumb2 function requires stack realignment - it will be used as
// scratch register.
@@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
- const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
bool Spilled = false;
@@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
- if (CSRegClasses[i] == ARM::GPRRegisterClass ||
- CSRegClasses[i] == ARM::tGPRRegisterClass) {
- if (Spilled) {
- NumGPRSpills++;
+ if (!ARM::GPRRegisterClass->contains(Reg))
+ continue;
- if (!STI.isTargetDarwin()) {
- if (Reg == ARM::LR)
- LRSpilled = true;
- CS1Spilled = true;
- continue;
- }
+ if (Spilled) {
+ NumGPRSpills++;
- // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
- switch (Reg) {
- case ARM::LR:
+ if (!STI.isTargetDarwin()) {
+ if (Reg == ARM::LR)
LRSpilled = true;
- // Fallthrough
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- CS1Spilled = true;
- break;
- default:
- break;
- }
- } else {
- if (!STI.isTargetDarwin()) {
- UnspilledCS1GPRs.push_back(Reg);
- continue;
- }
+ CS1Spilled = true;
+ continue;
+ }
- switch (Reg) {
- case ARM::R4:
- case ARM::R5:
- case ARM::R6:
- case ARM::R7:
- case ARM::LR:
- UnspilledCS1GPRs.push_back(Reg);
- break;
- default:
- UnspilledCS2GPRs.push_back(Reg);
- break;
- }
+ // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
+ switch (Reg) {
+ case ARM::LR:
+ LRSpilled = true;
+ // Fallthrough
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ CS1Spilled = true;
+ break;
+ default:
+ break;
+ }
+ } else {
+ if (!STI.isTargetDarwin()) {
+ UnspilledCS1GPRs.push_back(Reg);
+ continue;
+ }
+
+ switch (Reg) {
+ case ARM::R4:
+ case ARM::R5:
+ case ARM::R6:
+ case ARM::R7:
+ case ARM::LR:
+ UnspilledCS1GPRs.push_back(Reg);
+ break;
+ default:
+ UnspilledCS2GPRs.push_back(Reg);
+ break;
}
}
}
@@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// offset, make sure a register (or a spill slot) is available for the
// register scavenger. Note that if we're indexing off the frame pointer, the
// effective stack size is 4 bytes larger since the FP points to the stack
- // slot of the previous FP.
- bool BigStack = RS &&
- estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+ // slot of the previous FP. Also, if we have variable sized objects in the
+ // function, stack slot references will often be negative, and some of
+ // our instructions are positive-offset only, so conservatively consider
+ // that case to want a spill slot (or register) as well.
+ // FIXME: We could add logic to be more precise about negative offsets
+ // and which instructions will need a scratch register for them. Is it
+ // worth the effort and added fragility?
+ bool BigStack =
+ (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >=
+ estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects();
bool ExtraCSSpill = false;
if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
@@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// note: Thumb1 functions spill to R12, not the stack. Reserve a slot
// closest to SP or frame pointer.
const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- MachineFrameInfo *MFI = MF.getFrameInfo();
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
@@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = prior(MBB.end());
assert(MBBI->getDesc().isReturn() &&
"Can only insert epilog into returning blocks");
+ unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl = MBBI->getDebugLoc();
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size());
}
+ if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND ||
+ RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) {
+ // Tail call return: adjust the stack pointer and jump to callee.
+ MBBI = prior(MBB.end());
+ MachineOperand &JumpTarget = MBBI->getOperand(0);
+
+ // Jump to label or value in register.
+ if (RetOpcode == ARM::TCRETURNdi) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNdiND) {
+ BuildMI(MBB, MBBI, dl,
+ TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)).
+ addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
+ JumpTarget.getTargetFlags());
+ } else if (RetOpcode == ARM::TCRETURNri) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ } else if (RetOpcode == ARM::TCRETURNriND) {
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
+ }
+
+ MachineInstr *NewMI = prior(MBBI);
+ for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
+ NewMI->addOperand(MBBI->getOperand(i));
+
+ // Delete the pseudo instruction TCRETURN.
+ MBB.erase(MBBI);
+ }
+
if (VARegSaveSize)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 2c9c82d..f7ee0d5 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -69,9 +69,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
/// getMatchingSuperRegClass - Return a subclass of the specified register
@@ -81,14 +78,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
- /// canCombinedSubRegIndex - Given a register class and a list of sub-register
- /// indices, return true if it's possible to combine the sub-register indices
- /// into one that corresponds to a larger sub-register. Return the new sub-
- /// register index by reference. Note the new index by be zero if the given
- /// sub-registers combined to form the whole register.
- virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
- SmallVectorImpl<unsigned> &SubIndices,
- unsigned &NewSubIdx) const;
+ /// canCombineSubRegIndices - Given a register class and a list of
+ /// subregister indices, return true if it's possible to combine the
+ /// subregister indices into one that corresponds to a larger
+ /// subregister. Return the new subregister index by reference. Note the
+ /// new index may be zero if the given subregisters can be combined to
+ /// form the whole register.
+ virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -150,8 +148,8 @@ public:
virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) const;
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, FrameIndexValue *Value = NULL,
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f2730fc..7895cb0 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -55,6 +55,7 @@ namespace {
const std::vector<MachineConstantPoolEntry> *MCPEs;
const std::vector<MachineJumpTableEntry> *MJTEs;
bool IsPIC;
+ bool IsThumb;
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<MachineModuleInfo>();
@@ -67,8 +68,8 @@ namespace {
: MachineFunctionPass(&ID), JTI(0),
II((const ARMInstrInfo *)tm.getInstrInfo()),
TD(tm.getTargetData()), TM(tm),
- MCE(mce), MCPEs(0), MJTEs(0),
- IsPIC(TM.getRelocationModel() == Reloc::PIC_) {}
+ MCE(mce), MCPEs(0), MJTEs(0),
+ IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {}
/// getBinaryCodeForInstr - This function, generated by the
/// CodeEmitterGenerator using TableGen, produces the binary encoding for
@@ -139,6 +140,12 @@ namespace {
void emitMiscInstruction(const MachineInstr &MI);
+ void emitNEONLaneInstruction(const MachineInstr &MI);
+ void emitNEONDupInstruction(const MachineInstr &MI);
+ void emitNEON1RegModImmInstruction(const MachineInstr &MI);
+ void emitNEON2RegInstruction(const MachineInstr &MI);
+ void emitNEON3RegInstruction(const MachineInstr &MI);
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO);
@@ -147,7 +154,8 @@ namespace {
}
/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
- /// machine operand requires relocation, record the relocation and return zero.
+ /// machine operand requires relocation, record the relocation and return
+ /// zero.
unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
unsigned Reloc);
unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
@@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) {
MJTEs = 0;
if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables();
IsPIC = TM.getRelocationModel() == Reloc::PIC_;
+ IsThumb = MF.getInfo<ARMFunctionInfo>()->isThumbFunction();
JTI->Initialize(MF, IsPIC);
MMI = &getAnalysis<MachineModuleInfo>();
MCE.setModuleInfo(MMI);
@@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
MCE.processDebugLoc(MI.getDebugLoc(), true);
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
switch (MI.getDesc().TSFlags & ARMII::FormMask) {
default: {
llvm_unreachable("Unhandled instruction encoding format!");
@@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) {
case ARMII::VFPMiscFrm:
emitMiscInstruction(MI);
break;
+ // NEON instructions.
+ case ARMII::NGetLnFrm:
+ case ARMII::NSetLnFrm:
+ emitNEONLaneInstruction(MI);
+ break;
+ case ARMII::NDupFrm:
+ emitNEONDupInstruction(MI);
+ break;
+ case ARMII::N1RegModImmFrm:
+ emitNEON1RegModImmInstruction(MI);
+ break;
+ case ARMII::N2RegFrm:
+ emitNEON2RegInstruction(MI);
+ break;
+ case ARMII::N3RegFrm:
+ emitNEON3RegInstruction(MI);
+ break;
}
MCE.processDebugLoc(MI.getDebugLoc(), false);
}
@@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
emitWordLE(Binary);
}
+static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegD = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegD = ARMRegisterInfo::getRegisterNumbering(RegD);
+ Binary |= (RegD & 0xf) << ARMII::RegRdShift;
+ Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegN = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegN = ARMRegisterInfo::getRegisterNumbering(RegN);
+ Binary |= (RegN & 0xf) << ARMII::RegRnShift;
+ Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift;
+ return Binary;
+}
+
+static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) {
+ unsigned RegM = MI.getOperand(OpIdx).getReg();
+ unsigned Binary = 0;
+ RegM = ARMRegisterInfo::getRegisterNumbering(RegM);
+ Binary |= (RegM & 0xf);
+ Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift;
+ return Binary;
+}
+
+/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON
+/// data-processing instruction to the corresponding Thumb encoding.
+static unsigned convertNEONDataProcToThumb(unsigned Binary) {
+ assert((Binary & 0xfe000000) == 0xf2000000 &&
+ "not an ARM NEON data-processing instruction");
+ unsigned UBit = (Binary >> 24) & 1;
+ return 0xef000000 | (UBit << 28) | (Binary & 0xffffff);
+}
+
+void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
+ const TargetInstrDesc &TID = MI.getDesc();
+ if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+ RegTOpIdx = 0;
+ RegNOpIdx = 1;
+ LnOpIdx = 2;
+ } else { // ARMII::NSetLnFrm
+ RegTOpIdx = 2;
+ RegNOpIdx = 0;
+ LnOpIdx = 3;
+ }
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(RegTOpIdx).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, RegNOpIdx);
+
+ unsigned LaneShift;
+ if ((Binary & (1 << 22)) != 0)
+ LaneShift = 0; // 8-bit elements
+ else if ((Binary & (1 << 5)) != 0)
+ LaneShift = 1; // 16-bit elements
+ else
+ LaneShift = 2; // 32-bit elements
+
+ unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift;
+ unsigned Opc1 = Lane >> 2;
+ unsigned Opc2 = Lane & 3;
+ assert((Opc1 & 3) == 0 && "out-of-range lane number operand");
+ Binary |= (Opc1 << 21);
+ Binary |= (Opc2 << 5);
+
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+
+ // Set the conditional execution predicate
+ Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift;
+
+ unsigned RegT = MI.getOperand(1).getReg();
+ RegT = ARMRegisterInfo::getRegisterNumbering(RegT);
+ Binary |= (RegT << ARMII::RegRdShift);
+ Binary |= encodeNEONRn(MI, 0);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd.
+ Binary |= encodeNEONRd(MI, 0);
+ // Immediate fields: Op, Cmode, I, Imm3, Imm4
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Op = (Imm >> 12) & 1;
+ unsigned Cmode = (Imm >> 8) & 0xf;
+ unsigned I = (Imm >> 7) & 1;
+ unsigned Imm3 = (Imm >> 4) & 0x7;
+ unsigned Imm4 = Imm & 0xf;
+ Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4;
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source register in Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VDUPfdf or VDUPfqf.
+ emitWordLE(Binary);
+}
+
+void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
+ const TargetInstrDesc &TID = MI.getDesc();
+ unsigned Binary = getBinaryCodeForInstr(MI);
+ // Destination register is encoded in Dd; source registers in Dn and Dm.
+ unsigned OpIdx = 0;
+ Binary |= encodeNEONRd(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRn(MI, OpIdx++);
+ if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+ ++OpIdx;
+ Binary |= encodeNEONRm(MI, OpIdx);
+ if (IsThumb)
+ Binary = convertNEONDataProcToThumb(Binary);
+ // FIXME: This does not handle VMOVDneon or VMOVQ.
+ emitWordLE(Binary);
+}
+
#include "ARMGenCodeEmitter.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 13d8b74..65a3da6 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
if (CPChange && ++NoCPIters > 30)
llvm_unreachable("Constant Island pass failed to converge!");
DEBUG(dumpBBs());
-
+
// Clear NewWaterList now. If we split a block for branches, it should
// appear as "new water" for the next iteration of constant pool placement.
NewWaterList.clear();
@@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// After a while, this might be made debug-only, but it is not expensive.
verify(MF);
- // If LR has been forced spilled and no far jumps (i.e. BL) has been issued.
- // Undo the spill / restore of LR if possible.
+ // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
+ // undo the spill / restore of LR if possible.
if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
MadeChange |= UndoLRSpillRestore();
@@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
std::vector<CPEntry> CPEs;
CPEs.push_back(CPEntry(CPEMI, i));
CPEntries.push_back(CPEs);
- NumCPEs++;
+ ++NumCPEs;
DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i
<< "\n");
}
@@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end())
return false;
MachineBasicBlock *NextBB = llvm::next(MBBI);
@@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
unsigned MBBSize = 0;
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
+ if (I->isDebugValue())
+ continue;
// Add instruction size to MBBSize.
MBBSize += TII->GetInstSizeInBytes(I);
@@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
// correspond to anything in the source.
unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
- NumSplit++;
+ ++NumSplit;
// Update the CFG. All succs of OrigBB are now succs of NewBB.
while (!OrigBB->succ_empty()) {
@@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) {
if (--CPE->RefCount == 0) {
RemoveDeadCPEMI(CPEMI);
CPE->CPEMI = NULL;
- NumCPEs--;
+ --NumCPEs;
return true;
}
return false;
@@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
.addImm(ID).addConstantPoolIndex(CPI).addImm(Size);
CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
- NumCPEs++;
+ ++NumCPEs;
BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()];
// Compensate for .align 2 in thumb mode.
@@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) {
BBSizes[MBB->getNumber()] += 2;
AdjustBBOffsetsAfter(MBB, 2);
HasFarJump = true;
- NumUBrFixed++;
+ ++NumUBrFixed;
DEBUG(errs() << " Changed B to long jump " << *MI);
@@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
MachineInstr *BMI = &MBB->back();
bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
- NumCBrFixed++;
+ ++NumCBrFixed;
if (BMI != MI) {
if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
@@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// constantpool tables?
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
continue;
unsigned IdxReg = MI->getOperand(1).getReg();
bool IdxRegKill = MI->getOperand(1).isKill();
+
+ // Scan backwards to find the instruction that defines the base
+ // register. Due to post-RA scheduling, we can't count on it
+ // immediately preceding the branch instruction.
MachineBasicBlock::iterator PrevI = MI;
- if (PrevI == MBB->begin())
+ MachineBasicBlock::iterator B = MBB->begin();
+ while (PrevI != B && !PrevI->definesRegister(BaseReg))
+ --PrevI;
+
+ // If for some reason we didn't find it, we can't do anything, so
+ // just skip this one.
+ if (!PrevI->definesRegister(BaseReg))
continue;
- MachineInstr *AddrMI = --PrevI;
+ MachineInstr *AddrMI = PrevI;
bool OptOk = true;
- // Examine the instruction that calculate the jumptable entry address.
- // If it's not the one just before the t2BR_JT, we won't delete it, then
- // it's not worth doing the optimization.
+ // Examine the instruction that calculates the jumptable entry address.
+ // Make sure it only defines the base register and kills any uses
+ // other than the index register.
for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) {
const MachineOperand &MO = AddrMI->getOperand(k);
if (!MO.isReg() || !MO.getReg())
@@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
if (!OptOk)
continue;
- // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want
+ // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction
+ // that gave us the initial base register definition.
+ for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI)
+ ;
+
+ // The instruction should be a tLEApcrel or t2LEApcrelJT; we want
// to delete it as well.
- MachineInstr *LeaMI = --PrevI;
+ MachineInstr *LeaMI = PrevI;
if ((LeaMI->getOpcode() != ARM::tLEApcrelJT &&
LeaMI->getOpcode() != ARM::t2LEApcrelJT) ||
LeaMI->getOperand(0).getReg() != BaseReg)
@@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
if (MJTI == 0) return false;
-
+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
{
MachineFunction &MF = *BB->getParent();
- // If it's the destination block is terminated by an unconditional branch,
+ // If the destination block is terminated by an unconditional branch,
// try to move it; otherwise, create a new block following the jump
// table that branches back to the actual target. This is a very simple
// heuristic. FIXME: We can definitely improve it.
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 6f4eddf..3119b54 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -15,6 +15,7 @@
#define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H
#include "llvm/CodeGen/MachineConstantPool.h"
+#include <cstddef>
namespace llvm {
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c87f5d7..9c62597 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineInstrBuilder Even =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ .addReg(EvenDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
MachineInstrBuilder Odd =
AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
TII->get(ARM::VMOVQ))
- .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
- .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ .addReg(OddDst,
+ getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
Modified = true;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9baef6b..c84d3ff 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMTargetMachine.h"
@@ -35,11 +36,6 @@
using namespace llvm;
-static cl::opt<bool>
-UseRegSeq("neon-reg-sequence", cl::Hidden,
- cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
- cl::init(true));
-
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
/// instructions for SelectionDAG operations.
@@ -147,6 +143,11 @@ private:
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1);
+ /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2,
+ /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be
+ /// generated to force the table registers to be consecutive.
+ SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
+
/// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
@@ -173,24 +174,17 @@ private:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Form a quad register from a pair of D registers.
- ///
+ // Form pairs of consecutive S, D, or Q registers.
+ SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1);
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
-
- /// PairDRegs - Form a quad register pair from a pair of Q registers.
- ///
SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
- /// QuadDRegs - Form a quad register pair from a quad of D registers.
- ///
+ // Form sequences of 4 consecutive S, D, or Q registers.
+ SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
-
- /// QuadQRegs - Form 4 consecutive Q registers.
- ///
SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
- /// OctoDRegs - Form 8 consecutive D registers.
- ///
+ // Form sequences of 8 consecutive D registers.
SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
@@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N,
bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Offset){
// FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = Op->getDebugLoc();
if (N.getOpcode() != ISD::ADD) {
ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
- if (!NC || NC->getZExtValue() != 0)
+ if (!NC || !NC->isNullValue())
return false;
Base = Offset = N;
@@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
if (N.getOpcode() == ISD::ADD) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
+ // 8 bits.
if (((RHSC & 0x3) == 0) &&
- ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits.
+ ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32);
return true;
@@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N,
} else if (N.getOpcode() == ISD::SUB) {
if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
int RHSC = (int)RHS->getZExtValue();
- if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits.
+ // 8 bits.
+ if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) {
Base = N.getOperand(0);
OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32);
return true;
@@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
+/// PairSRegs - Form a D register from a pair of S registers.
+///
+SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
/// PairDRegs - Form a quad register from a pair of D registers.
///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
- if (llvm::ModelWithRegSequence()) {
- const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
- return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
- }
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0);
- SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, Undef, V0, SubReg0);
- return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
- VT, SDValue(Pair, 0), V1, SubReg1);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
@@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
+/// QuadSRegs - Form 4 consecutive S registers.
+///
+SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
/// QuadDRegs - Form 4 consecutive D registers.
///
SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
@@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+ if (NumVecs < 2)
return VLd;
SDValue RegSeq;
@@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
- if (llvm::ModelWithRegSequence()) {
- if (NumVecs == 1) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
- ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
- } else {
- SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
- SDValue(VLd, 0), SDValue(VLd, 1),
- SDValue(VLd, 2), SDValue(VLd, 3)), 0);
- SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
- SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
- ReplaceUses(SDValue(N, 0), Q0);
- ReplaceUses(SDValue(N, 1), Q1);
- }
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
}
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
- if (llvm::ModelWithRegSequence()) {
- SDValue V0 = SDValue(VLdA, 0);
- SDValue V1 = SDValue(VLdB, 0);
- SDValue V2 = SDValue(VLdA, 1);
- SDValue V3 = SDValue(VLdB, 1);
- SDValue V4 = SDValue(VLdA, 2);
- SDValue V5 = SDValue(VLdB, 2);
- SDValue V6 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdA, 3);
- SDValue V7 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
- 0)
- : SDValue(VLdB, 3);
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
- V4, V5, V6, V7), 0);
-
- // Extract out the 3 / 4 Q registers.
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
- dl, VT, RegSeq);
- ReplaceUses(SDValue(N, Vec), Q);
- }
- } else {
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
- }
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
}
}
ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Align);
if (is64BitVector) {
- if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+ if (NumVecs >= 2) {
SDValue RegSeq;
SDValue V0 = N->getOperand(0+3);
SDValue V1 = N->getOperand(1+3);
@@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+ if (NumVecs == 2) {
// First extract the pair of Q registers.
SDValue Q0 = N->getOperand(3);
SDValue Q1 = N->getOperand(4);
@@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
- if (llvm::ModelWithRegSequence()) {
- // Form the QQQQ REG_SEQUENCE.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3));
- V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- }
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
-
- // Store the even D registers.
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- Ops.push_back(Reg0); // post-access address offset
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
- RegVT, RegSeq));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
- // Store the odd D registers.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
- RegVT, RegSeq);
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- } else {
- Ops.push_back(Reg0); // post-access address offset
-
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
-
- // Store the odd subregs.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
- }
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
- int SubregIdx = 0;
bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
Even = Lane < NumElts;
}
@@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
-
- // Now extract the D registers back out.
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
- RegSeq));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
- RegSeq));
- if (NumVecs > 2)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
- RegSeq));
- if (NumVecs > 3)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
}
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq));
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
@@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
Opc = QOpcodes1[OpcodeIndex];
}
- if (llvm::ModelWithRegSequence()) {
- SDValue RegSeq;
- SDValue V0 = N->getOperand(0+3);
- SDValue V1 = N->getOperand(1+3);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
- } else {
- SDValue V2 = N->getOperand(2+3);
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : N->getOperand(3+3);
- RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
- }
-
- // Extract the subregs of the input vector.
- unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
- RegSeq));
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
} else {
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
}
+
+ // Extract the subregs of the input vector.
+ unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+ RegSeq));
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
@@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
ResTys.push_back(MVT::Other);
SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
- if (llvm::ModelWithRegSequence()) {
- // Form a REG_SEQUENCE to force register allocation.
- SDValue RegSeq;
- if (is64BitVector) {
- SDValue V0 = SDValue(VLdLn, 0);
- SDValue V1 = SDValue(VLdLn, 1);
- if (NumVecs == 2) {
- RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
- } else {
- SDValue V2 = SDValue(VLdLn, 2);
- // If it's a vld3, form a quad D-register but discard the last part.
- SDValue V3 = (NumVecs == 3)
- ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
- : SDValue(VLdLn, 3);
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
- }
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ if (is64BitVector) {
+ SDValue V0 = SDValue(VLdLn, 0);
+ SDValue V1 = SDValue(VLdLn, 1);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
} else {
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- SDValue V[8];
- for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
- if (Even) {
- V[i] = SDValue(VLdLn, Vec);
- V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- } else {
- V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
- V[i+1] = SDValue(VLdLn, Vec);
- }
+ SDValue V2 = SDValue(VLdLn, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLdLn, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // For 128-bit vectors, take the 64-bit results of the load and insert
+ // them as subregs into the result.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ if (Even) {
+ V[i] = SDValue(VLdLn, Vec);
+ V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ } else {
+ V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ V[i+1] = SDValue(VLdLn, Vec);
}
- if (NumVecs == 3)
- V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
- dl, RegVT), 0);
-
- if (NumVecs == 2)
- RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
- else
- RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
- V[4], V[5], V[6], V[7]), 0);
}
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
- unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- ReplaceUses(SDValue(N, Vec),
- CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
- ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
- return NULL;
- }
-
- // For a 64-bit vector load to D registers, nothing more needs to be done.
- if (is64BitVector)
- return VLdLn;
-
- // For 128-bit vectors, take the 64-bit results of the load and insert them
- // as subregs into the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT,
- N->getOperand(Vec+3),
- SDValue(VLdLn, Vec));
- ReplaceUses(SDValue(N, Vec), QuadVec);
+ if (NumVecs == 2)
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+ else
+ RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
}
- Chain = SDValue(VLdLn, NumVecs);
- ReplaceUses(SDValue(N, NumVecs), Chain);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
return NULL;
}
+SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
+ unsigned Opc) {
+ assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ unsigned FirstTblReg = IsExt ? 2 : 1;
+
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(FirstTblReg + 0);
+ SDValue V1 = N->getOperand(FirstTblReg + 1);
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(FirstTblReg + 2);
+ // If it's a vtbl3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
+ : N->getOperand(FirstTblReg + 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ SmallVector<SDValue, 6> Ops;
+ if (IsExt)
+ Ops.push_back(N->getOperand(1));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq));
+
+ Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
+ Ops.push_back(getAL(CurDAG)); // predicate
+ Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
+ return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size());
+}
+
SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
bool isSigned) {
if (!Subtarget->hasV6T2Ops())
@@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32);
SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
if (Subtarget->isThumb()) {
- SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 };
- return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5);
+ SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
+ return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6);
} else {
SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 };
return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7);
@@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
if (Subtarget->isThumb()) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) };
- return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4);
+ return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4);
} else {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
getAL(CurDAG), CurDAG->getRegister(0, MVT::i32),
@@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4);
}
+ case ARMISD::BUILD_VECTOR: {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = VecVT.getVectorElementType();
+ unsigned NumElts = VecVT.getVectorNumElements();
+ if (EltVT.getSimpleVT() == MVT::f64) {
+ assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
+ return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ }
+ assert(EltVT.getSimpleVT() == MVT::f32 &&
+ "unexpected type for BUILD_VECTOR");
+ if (NumElts == 2)
+ return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1));
+ assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
+ return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN: {
@@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
break;
}
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+ switch (IntNo) {
+ default:
+ break;
+
+ case Intrinsic::arm_neon_vtbl2:
+ return SelectVTBL(N, false, 2, ARM::VTBL2);
+ case Intrinsic::arm_neon_vtbl3:
+ return SelectVTBL(N, false, 3, ARM::VTBL3);
+ case Intrinsic::arm_neon_vtbl4:
+ return SelectVTBL(N, false, 4, ARM::VTBL4);
+
+ case Intrinsic::arm_neon_vtbx2:
+ return SelectVTBL(N, true, 2, ARM::VTBX2);
+ case Intrinsic::arm_neon_vtbx3:
+ return SelectVTBL(N, true, 3, ARM::VTBX3);
+ case Intrinsic::arm_neon_vtbx4:
+ return SelectVTBL(N, true, 4, ARM::VTBX4);
+ }
+ break;
+ }
+
case ISD::CONCAT_VECTORS:
return SelectConcatVector(N);
}
@@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
-
-/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
-/// operations involving sub-registers.
-bool llvm::ModelWithRegSequence() {
- return UseRegSeq;
-}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b8126a3..98d8b85 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -12,6 +12,7 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "arm-isel"
#include "ARM.h"
#include "ARMAddressingModes.h"
#include "ARMConstantPoolValue.h"
@@ -40,6 +41,7 @@
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -47,9 +49,27 @@
#include <sstream>
using namespace llvm;
+STATISTIC(NumTailCalls, "Number of tail calls");
+
+// This option should go away when tail calls fully work.
+static cl::opt<bool>
+EnableARMTailCalls("arm-tail-calls", cl::Hidden,
+ cl::desc("Generate tail calls (TEMPORARY OPTION)."),
+ cl::init(true));
+
static cl::opt<bool>
EnableARMLongCalls("arm-long-calls", cl::Hidden,
- cl::desc("Generate calls via indirect call instructions."),
+ cl::desc("Generate calls via indirect call instructions"),
+ cl::init(false));
+
+static cl::opt<bool>
+ARMInterworking("arm-interworking", cl::Hidden,
+ cl::desc("Enable / disable ARM interworking (for debugging only)"),
+ cl::init(true));
+
+static cl::opt<bool>
+EnableARMCodePlacement("arm-code-placement", cl::Hidden,
+ cl::desc("Enable code placement pass for ARM"),
cl::init(false));
static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT,
@@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- if (llvm::ModelWithRegSequence())
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
- else
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// doesn't yet know how to not do that for SjLj.
setExceptionSelectorRegister(ARM::R0);
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
-
- // If the subtarget does not have extract instructions, sign_extend_inreg
- // needs to be expanded. Extract is available in ARM mode on v6 and up,
- // and on most Thumb2 implementations.
- if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
- || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
+ // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise
+ // use the default expansion.
+ bool canHandleAtomics =
+ (Subtarget->hasV7Ops() ||
+ (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()));
+ if (canHandleAtomics) {
+ // membarrier needs custom lowering; the rest are legal and handled
+ // normally.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
+ } else {
+ // Set them all for expansion, which will force libcalls.
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
+ // Since the libcalls include locking, fold in the fences
+ setShouldFoldAtomicFences(true);
+ }
+ // 64-bit versions are always libcalls (for now)
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
+
+ // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
+ if (!Subtarget->hasV6Ops()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
@@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
- setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ if (Subtarget->isTargetDarwin()) {
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
+ }
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
else
setSchedulingPreference(Sched::Hybrid);
- // FIXME: If-converter should use instruction latency to determine
- // profitability rather than relying on fixed limits.
- if (Subtarget->getCPUString() == "generic") {
- // Generic (and overly aggressive) if-conversion limits.
- setIfCvtBlockSizeLimit(10);
- setIfCvtDupBlockSizeLimit(2);
- } else if (Subtarget->hasV7Ops()) {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(1);
- } else if (Subtarget->hasV6Ops()) {
- setIfCvtBlockSizeLimit(2);
- setIfCvtDupBlockSizeLimit(1);
- } else {
- setIfCvtBlockSizeLimit(3);
- setIfCvtDupBlockSizeLimit(2);
- }
-
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
- // Do not enable CodePlacementOpt for now: it currently runs after the
- // ARMConstantIslandPass and messes up branch relaxation and placement
- // of constant islands.
- // benefitFromCodePlacementOpt = true;
+
+ // On ARM arguments smaller than 4 bytes are extended, so all arguments
+ // are at least 4 bytes aligned.
+ setMinStackArgumentAlignment(4);
+
+ if (EnableARMCodePlacement)
+ benefitFromCodePlacementOpt = true;
}
const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
+ case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
+
case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
@@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
}
@@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
- return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
+ return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2;
}
Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
- for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ unsigned NumVals = N->getNumValues();
+ if (!NumVals)
+ return Sched::RegPressure;
+
+ for (unsigned i = 0; i != NumVals; ++i) {
EVT VT = N->getValueType(i);
if (VT.isFloatingPoint() || VT.isVector())
return Sched::Latency;
}
+
+ if (!N->isMachineOpcode())
+ return Sched::RegPressure;
+
+ // Load are scheduled for latency even if there instruction itinerary
+ // is not available.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+ if (TID.mayLoad())
+ return Sched::Latency;
+
+ const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
+ if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
+ return Sched::Latency;
return Sched::RegPressure;
}
@@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
- // ARM target does not yet support tail call optimization.
- isTailCall = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
+ bool IsSibCall = false;
+ // Temporarily disable tail calls so things don't break.
+ if (!EnableARMTailCalls)
+ isTailCall = false;
+ if (isTailCall) {
+ // Check if it's really possible to do a tail call.
+ isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+ isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
+ Outs, OutVals, Ins, DAG);
+ // We don't support GuaranteedTailCallOpt for ARM, only automatically
+ // detected sibcalls.
+ if (isTailCall) {
+ ++NumTailCalls;
+ IsSibCall = true;
+ }
+ }
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
+ // For tail calls, memory operands are available in our caller's stack.
+ if (IsSibCall)
+ NumBytes = 0;
+
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ if (!IsSibCall)
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
@@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
i != e;
++i, ++realArgIdx) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[realArgIdx].Val;
+ SDValue Arg = OutVals[realArgIdx];
ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
// Promote the value if needed.
@@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
}
} else if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
- } else {
+ } else if (!IsSibCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Build a sequence of copy-to-reg nodes chained together with token chain
// and flag operands which copy the outgoing args into the appropriate regs.
SDValue InFlag;
- for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
- Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
- RegsToPass[i].second, InFlag);
- InFlag = Chain.getValue(1);
+ // Tail call byval lowering might overwrite argument registers so in case of
+ // tail call optimization the copies to registers are lowered later.
+ if (!isTailCall)
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // For tail calls lower the arguments to the 'real' stack slot.
+ if (isTailCall) {
+ // Force all the incoming stack arguments to be loaded from the stack
+ // before any new outgoing arguments are stored to the stack, because the
+ // outgoing stack slots may alias the incoming argument stack slots, and
+ // the alias isn't otherwise explicit. This is slightly more conservative
+ // than necessary, because it means that each store effectively depends
+ // on every argument instead of just those arguments it would clobber.
+
+ // Do not flag preceeding copytoreg stuff together with the following stuff.
+ InFlag = SDValue();
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+ InFlag =SDValue();
}
// If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
@@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
- MachineFunction &MF = DAG.getMachineFunction();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (EnableARMLongCalls) {
@@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() != Reloc::Static;
isARMFunc = !Subtarget->isThumb() || isStub;
// ARM call to a local ARM function is predicable.
- isLocalARMFunc = !Subtarget->isThumb() && !isExt;
+ isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
@@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
isDirect = true;
bool isStub = Subtarget->isTargetDarwin() &&
@@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
: ARMISD::CALL_NOLINK;
}
- if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) {
- // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK
- Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag);
- InFlag = Chain.getValue(1);
- }
std::vector<SDValue> Ops;
Ops.push_back(Chain);
@@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (InFlag.getNode())
Ops.push_back(InFlag);
+
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ if (isTailCall)
+ return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
+
// Returns a chain and a flag for retval copy to use.
- Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
- &Ops[0], Ops.size());
+ Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
@@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
dl, DAG, InVals);
}
+/// MatchingStackOffset - Return true if the given stack call argument is
+/// already available in the same position (relatively) of the caller's
+/// incoming argument stack.
+static
+bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
+ MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+ const ARMInstrInfo *TII) {
+ unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+ int FI = INT_MAX;
+ if (Arg.getOpcode() == ISD::CopyFromReg) {
+ unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
+ if (!VR || TargetRegisterInfo::isPhysicalRegister(VR))
+ return false;
+ MachineInstr *Def = MRI->getVRegDef(VR);
+ if (!Def)
+ return false;
+ if (!Flags.isByVal()) {
+ if (!TII->isLoadFromStackSlot(Def, FI))
+ return false;
+ } else {
+ return false;
+ }
+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
+ if (Flags.isByVal())
+ // ByVal argument is passed in as a pointer but it's now being
+ // dereferenced. e.g.
+ // define @foo(%struct.X* %A) {
+ // tail call @bar(%struct.X* byval %A)
+ // }
+ return false;
+ SDValue Ptr = Ld->getBasePtr();
+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
+ if (!FINode)
+ return false;
+ FI = FINode->getIndex();
+ } else
+ return false;
+
+ assert(FI != INT_MAX);
+ if (!MFI->isFixedObjectIndex(FI))
+ return false;
+ return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+}
+
+/// IsEligibleForTailCallOptimization - Check whether the call is eligible
+/// for tail call optimization. Targets which want to do tail call
+/// optimization should implement this function.
+bool
+ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const {
+ const Function *CallerF = DAG.getMachineFunction().getFunction();
+ CallingConv::ID CallerCC = CallerF->getCallingConv();
+ bool CCMatch = CallerCC == CalleeCC;
+
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing
+ // any arguments.
+ if (isVarArg && !Outs.empty())
+ return false;
+
+ // Also avoid sibcall optimization if either caller or callee uses struct
+ // return semantics.
+ if (isCalleeStructRet || isCallerStructRet)
+ return false;
+
+ // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
+ // emitEpilogue is not ready for them.
+ // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
+ // LR. This means if we need to reload LR, it takes an extra instructions,
+ // which outweighs the value of the tail call; but here we don't know yet
+ // whether LR is going to be used. Probably the right approach is to
+ // generate the tail call here and turn it back into CALL/RET in
+ // emitEpilogue if LR is used.
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ // For the moment, we can only do this to functions defined in this
+ // compilation, or to indirect calls. A Thumb B to an ARM function,
+ // or vice versa, is not easily fixed up in the linker unlike BL.
+ // (We could do this by loading the address of the callee into a register;
+ // that is an extra instruction over the direct call and burns a register
+ // as well, so is not likely to be a win.)
+
+ // It might be safe to remove this restriction on non-Darwin.
+
+ // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
+ // but we need to make sure there are enough registers; the only valid
+ // registers are the 4 used for parameters. We don't currently do this
+ // case.
+ if (isa<ExternalSymbolSDNode>(Callee))
+ return false;
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ const GlobalValue *GV = G->getGlobal();
+ if (GV->isDeclaration() || GV->isWeakForLinker())
+ return false;
+ }
+
+ // If the calling conventions do not match, then we'd better make sure the
+ // results are returned in the same way as what the caller expects.
+ if (!CCMatch) {
+ SmallVector<CCValAssign, 16> RVLocs1;
+ CCState CCInfo1(CalleeCC, false, getTargetMachine(),
+ RVLocs1, *DAG.getContext());
+ CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
+
+ SmallVector<CCValAssign, 16> RVLocs2;
+ CCState CCInfo2(CallerCC, false, getTargetMachine(),
+ RVLocs2, *DAG.getContext());
+ CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
+
+ if (RVLocs1.size() != RVLocs2.size())
+ return false;
+ for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
+ if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
+ return false;
+ if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
+ return false;
+ if (RVLocs1[i].isRegLoc()) {
+ if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
+ return false;
+ } else {
+ if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
+ return false;
+ }
+ }
+ }
+
+ // If the callee takes no arguments then go on to check the results of the
+ // call.
+ if (!Outs.empty()) {
+ // Check if stack adjustment is needed. For now, do not do this if any
+ // argument is passed on the stack.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs,
+ CCAssignFnForNode(CalleeCC, false, isVarArg));
+ if (CCInfo.getNextStackOffset()) {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ // Check if the arguments are already laid out in the right way as
+ // the caller's fixed stack objects.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
+ const ARMInstrInfo *TII =
+ ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
+ i != e;
+ ++i, ++realArgIdx) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = OutVals[realArgIdx];
+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
+ if (VA.getLocInfo() == CCValAssign::Indirect)
+ return false;
+ if (VA.needsCustom()) {
+ // f64 and vector types are split into multiple registers or
+ // register/stack-slot combinations. The types will not match
+ // the registers; give up on memory f64 refs until we figure
+ // out what to do about this.
+ if (!VA.isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (RegVT == MVT::v2f64) {
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ if (!ArgLocs[++i].isRegLoc())
+ return false;
+ }
+ } else if (!VA.isRegLoc()) {
+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
+ MFI, MRI, TII))
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
SDValue
ARMTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location.
@@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Arg = Outs[realRVLocIdx].Val;
+ SDValue Arg = OutVals[realRVLocIdx];
switch (VA.getLocInfo()) {
default: llvm_unreachable("Unknown loc info!");
@@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
// pair. This is always cheaper.
if (Subtarget->useMovt()) {
return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
- DAG.getTargetGlobalAddress(GV, PtrVT));
+ DAG.getTargetGlobalAddress(GV, dl, PtrVT));
} else {
SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
@@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SDValue
ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- SDValue Val = Subtarget->isThumb() ?
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
- DAG.getConstant(0, MVT::i32);
+ SDValue Val = DAG.getConstant(0, MVT::i32);
return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
Op.getOperand(1), Val);
}
@@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget)
- const {
+ const ARMSubtarget *Subtarget) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
switch (IntNo) {
@@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0,
false, false, 0);
- SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
@@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *Subtarget) {
+ const ARMSubtarget *Subtarget) {
DebugLoc dl = Op.getDebugLoc();
SDValue Op5 = Op.getOperand(5);
- SDValue Res;
unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
- if (isDeviceBarrier) {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- } else {
- if (Subtarget->hasV7Ops())
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
- else
- Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
- DAG.getConstant(0, MVT::i32));
- }
- return Res;
+ // v6 and v7 can both handle barriers directly, but need handled a bit
+ // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should
+ // never get here.
+ unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER;
+ if (Subtarget->hasV7Ops())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0));
+ else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())
+ return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
+ return SDValue();
}
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
@@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
SDValue ArgValue2;
if (NextVA.isMemLoc()) {
MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false);
+ int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
VA = ArgLocs[++i]; // skip ahead to next loc
SDValue ArgValue2;
if (VA.isMemLoc()) {
- int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
@@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
AFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(VARegSaveSize,
ArgOffset + VARegSaveSize - VARegSize,
- true, false));
+ true));
SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
getPointerTy());
@@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
SDValue Store =
DAG.getStore(Val.getValue(1), dl, Val, FIN,
- PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0,
- false, false, 0);
+ PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()),
+ 0, false, false, 0);
MemOps.push_back(Store);
FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
DAG.getConstant(4, getPointerTy()));
@@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset,
- true, false));
+ AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
}
return Chain;
@@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
+static bool canBitcastToInt(SDNode *Op) {
+ return Op->hasOneUse() &&
+ ISD::isNormalLoad(Op) &&
+ Op->getValueType(0) == MVT::f32;
+}
+
+static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
-static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+SDValue
+ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG,
+ DebugLoc dl) const {
+ if (UnsafeFPMath && FiniteOnlyFPMath() &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE) &&
+ canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+ LHS = bitcastToInt(LHS, DAG);
+ RHS = bitcastToInt(RHS, DAG);
+ return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ }
+
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMCC, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
Result, TrueVal, ARMCC2, CCR, Cmp2);
}
@@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
@@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(Opc, dl, VT, Op);
}
-static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
+SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
// Implement fcopysign with a fabs and a conditional fneg.
SDValue Tmp0 = Op.getOperand(0);
SDValue Tmp1 = Op.getOperand(1);
@@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl);
SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0,
+ ISD::SETLT, ARMCC, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
@@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT,
+ DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
}
// Turn f64->i64 into VMOVRRD.
@@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
return Result;
}
-/// isVMOVSplat - Check if the specified splat value corresponds to an immediate
-/// VMOV instruction, and if so, return the constant being splatted.
-static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef,
- unsigned SplatBitSize, SelectionDAG &DAG) {
+/// isNEONModifiedImm - Check if the specified splat value corresponds to a
+/// valid vector constant for a NEON instruction with a "modified immediate"
+/// operand (e.g., VMOV). If so, return either the constant being
+/// splatted or the encoded value, depending on the DoEncode parameter.
+static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
+ unsigned SplatBitSize, SelectionDAG &DAG,
+ bool isVMOV, bool DoEncode) {
+ unsigned OpCmode, Imm;
+ EVT VT;
+
+ // SplatBitSize is set to the smallest size that splats the vector, so a
+ // zero vector will always have SplatBitSize == 8. However, NEON modified
+ // immediate instructions others than VMOV do not support the 8-bit encoding
+ // of a zero vector, and the default encoding of zero is supposed to be the
+ // 32-bit version.
+ if (SplatBits == 0)
+ SplatBitSize = 32;
+
switch (SplatBitSize) {
case 8:
- // Any 1-byte value is OK.
+ // Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
- return DAG.getTargetConstant(SplatBits, MVT::i8);
+ OpCmode = 0xe;
+ Imm = SplatBits;
+ VT = MVT::i8;
+ break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i16);
- break;
+ VT = MVT::i16;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x00nn: Op=x, Cmode=100x.
+ OpCmode = 0x8;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0xnn00: Op=x, Cmode=101x.
+ OpCmode = 0xa;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ return SDValue();
case 32:
// NEON's 32-bit VMOV supports splat values where:
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- if ((SplatBits & ~0xff) == 0 ||
- (SplatBits & ~0xff00) == 0 ||
- (SplatBits & ~0xff0000) == 0 ||
- (SplatBits & ~0xff000000) == 0)
- return DAG.getTargetConstant(SplatBits, MVT::i32);
+ VT = MVT::i32;
+ if ((SplatBits & ~0xff) == 0) {
+ // Value = 0x000000nn: Op=x, Cmode=000x.
+ OpCmode = 0;
+ Imm = SplatBits;
+ break;
+ }
+ if ((SplatBits & ~0xff00) == 0) {
+ // Value = 0x0000nn00: Op=x, Cmode=001x.
+ OpCmode = 0x2;
+ Imm = SplatBits >> 8;
+ break;
+ }
+ if ((SplatBits & ~0xff0000) == 0) {
+ // Value = 0x00nn0000: Op=x, Cmode=010x.
+ OpCmode = 0x4;
+ Imm = SplatBits >> 16;
+ break;
+ }
+ if ((SplatBits & ~0xff000000) == 0) {
+ // Value = 0xnn000000: Op=x, Cmode=011x.
+ OpCmode = 0x6;
+ Imm = SplatBits >> 24;
+ break;
+ }
if ((SplatBits & ~0xffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xff) == 0xff)
- return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xff) == 0xff) {
+ // Value = 0x0000nnff: Op=x, Cmode=1100.
+ OpCmode = 0xc;
+ Imm = SplatBits >> 8;
+ SplatBits |= 0xff;
+ break;
+ }
if ((SplatBits & ~0xffffff) == 0 &&
- ((SplatBits | SplatUndef) & 0xffff) == 0xffff)
- return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32);
+ ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
+ // Value = 0x00nnffff: Op=x, Cmode=1101.
+ OpCmode = 0xd;
+ Imm = SplatBits >> 16;
+ SplatBits |= 0xffff;
+ break;
+ }
// Note: there are a few 32-bit splat values (specifically: 00ffff00,
// ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
// VMOV.I32. A (very) minor optimization would be to replicate the value
// and fall through here to test for a valid 64-bit splat. But, then the
// caller would also need to check and handle the change in size.
- break;
+ return SDValue();
case 64: {
// NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
+ if (!isVMOV)
+ return SDValue();
uint64_t BitMask = 0xff;
uint64_t Val = 0;
+ unsigned ImmMask = 1;
+ Imm = 0;
for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
- if (((SplatBits | SplatUndef) & BitMask) == BitMask)
+ if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
Val |= BitMask;
- else if ((SplatBits & BitMask) != 0)
+ Imm |= ImmMask;
+ } else if ((SplatBits & BitMask) != 0) {
return SDValue();
+ }
BitMask <<= 8;
+ ImmMask <<= 1;
}
- return DAG.getTargetConstant(Val, MVT::i64);
+ // Op=1, Cmode=1110.
+ OpCmode = 0x1e;
+ SplatBits = Val;
+ VT = MVT::i64;
+ break;
}
default:
- llvm_unreachable("unexpected size for isVMOVSplat");
- break;
+ llvm_unreachable("unexpected size for isNEONModifiedImm");
+ return SDValue();
}
- return SDValue();
+ if (DoEncode) {
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
+ }
+ return DAG.getTargetConstant(SplatBits, VT);
}
-/// getVMOVImm - If this is a build_vector of constants which can be
-/// formed by using a VMOV instruction of the specified element size,
-/// return the constant being splatted. The ByteSize field indicates the
-/// number of bytes of each element [1248].
-SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
+/// getNEONModImm - If this is a valid vector constant for a NEON instruction
+/// with a "modified immediate" operand (e.g., VMOV) of the specified element
+/// size, return the encoded value for that immediate. The ByteSize field
+/// indicates the number of bytes of each element [1248].
+SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG) {
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
APInt SplatBits, SplatUndef;
unsigned SplatBitSize;
@@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
if (SplatBitSize > ByteSize * 8)
return SDValue();
- return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG);
+ return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, isVMOV, true);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
bool HasAnyUndefs;
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
- SDValue Val = isVMOVSplat(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(), SplatBitSize, DAG);
+ // Check if an immediate VMOV works.
+ SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
+ SplatUndef.getZExtValue(),
+ SplatBitSize, DAG, true, false);
if (Val.getNode())
return BuildSplat(Val, VT, DAG, dl);
}
@@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
// Vectors with 32- or 64-bit elements can be built by directly assigning
- // the subregisters.
+ // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
+ // will be legalized.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
EVT EltVT = EVT::getFloatingPointVT(EltSize);
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
- SDValue Val = DAG.getUNDEF(VecVT);
- for (unsigned i = 0; i < NumElts; ++i) {
- SDValue Elt = Op.getOperand(i);
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
- Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
- DAG.getConstant(i, MVT::i32));
- }
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i)));
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
bool ReverseVEXT;
unsigned Imm, WhichResult;
- return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ return (EltSize >= 32 ||
+ ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
isVREVMask(M, VT, 64) ||
isVREVMask(M, VT, 32) ||
isVREVMask(M, VT, 16) ||
@@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// of the same time so that they get CSEd properly.
SVN->getMask(ShuffleMask);
- if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
- int Lane = SVN->getSplatIndex();
- // If this is undef splat, generate it via "just" vdup, if possible.
- if (Lane == -1) Lane = 0;
-
- if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize <= 32) {
+ if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
+ int Lane = SVN->getSplatIndex();
+ // If this is undef splat, generate it via "just" vdup, if possible.
+ if (Lane == -1) Lane = 0;
+
+ if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+ return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
+ }
+ return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
+ DAG.getConstant(Lane, MVT::i32));
}
- return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
- DAG.getConstant(Lane, MVT::i32));
- }
- bool ReverseVEXT;
- unsigned Imm;
- if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
- if (ReverseVEXT)
- std::swap(V1, V2);
- return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
- DAG.getConstant(Imm, MVT::i32));
- }
-
- if (isVREVMask(ShuffleMask, VT, 64))
- return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 32))
- return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
- if (isVREVMask(ShuffleMask, VT, 16))
- return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
-
- // Check for Neon shuffles that modify both input vectors in place.
- // If both results are used, i.e., if there are two shuffles with the same
- // source operands and with masks corresponding to both results of one of
- // these operations, DAG memoization will ensure that a single node is
- // used for both shuffles.
- unsigned WhichResult;
- if (isVTRNMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVUZPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
- if (isVZIPMask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V2).getValue(WhichResult);
+ bool ReverseVEXT;
+ unsigned Imm;
+ if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
+ if (ReverseVEXT)
+ std::swap(V1, V2);
+ return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
+ DAG.getConstant(Imm, MVT::i32));
+ }
- if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
- if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
- return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
- V1, V1).getValue(WhichResult);
+ if (isVREVMask(ShuffleMask, VT, 64))
+ return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 32))
+ return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
+ if (isVREVMask(ShuffleMask, VT, 16))
+ return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
+
+ // Check for Neon shuffles that modify both input vectors in place.
+ // If both results are used, i.e., if there are two shuffles with the same
+ // source operands and with masks corresponding to both results of one of
+ // these operations, DAG memoization will ensure that a single node is
+ // used for both shuffles.
+ unsigned WhichResult;
+ if (isVTRNMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVUZPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+ if (isVZIPMask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V2).getValue(WhichResult);
+
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ }
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
@@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
- // Implement shuffles with 32- or 64-bit elements as subreg copies.
- unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
if (EltSize >= 32) {
// Do the expansion with floating-point types, since that is what the VFP
// registers are defined to use, and since i64 is not legal.
@@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
- SDValue Val = DAG.getUNDEF(VecVT);
+ SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumElts; ++i) {
if (ShuffleMask[i] < 0)
- continue;
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
- ShuffleMask[i] < (int)NumElts ? V1 : V2,
- DAG.getConstant(ShuffleMask[i] & (NumElts-1),
- MVT::i32));
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
- Elt, DAG.getConstant(i, MVT::i32));
+ Ops.push_back(DAG.getUNDEF(EltVT));
+ else
+ Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32)));
}
+ SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
@@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
MF->insert(It, loop1MBB);
MF->insert(It, loop2MBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
@@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MF->insert(It, loopMBB);
MF->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+
+ // Transfer the remainder of BB and its successor edges to exitMBB.
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = MF->getRegInfo();
unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
@@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
// ...
BB = exitMBB;
- MF->DeleteMachineInstr(MI); // The instruction is gone now.
+ MI->eraseFromParent(); // The instruction is gone now.
return BB;
}
@@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
- .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
+ .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(ARM::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP)
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP)
.addReg(SrcReg, getKillRegState(SrcIsKill));
}
@@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
NeedPred = true; NeedCC = true; NeedOp3 = true;
break;
}
- MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP);
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP);
if (OpOpc == ARM::tAND)
AddDefaultT1CC(MIB);
MIB.addReg(ARM::SP);
@@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg);
unsigned CopyOpc = (RC == ARM::tGPRRegisterClass)
? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr;
- BuildMI(BB, dl, TII->get(CopyOpc))
+ BuildMI(*BB, MI, dl, TII->get(CopyOpc))
.addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
.addReg(ARM::SP);
- MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
}
@@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
// Narrowing shifts require an immediate right shift.
if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
break;
- llvm_unreachable("invalid shift count for narrowing vector shift intrinsic");
+ llvm_unreachable("invalid shift count for narrowing vector shift "
+ "intrinsic");
default:
llvm_unreachable("unhandled vector shift");
@@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
if (!Subtarget->hasV6Ops())
// Pre-v6 does not support unaligned mem access.
return false;
- else {
- // v6+ may or may not support unaligned mem access depending on the system
- // configuration.
- // FIXME: This is pretty conservative. Should we provide cmdline option to
- // control the behaviour?
- if (!Subtarget->isTargetDarwin())
- return false;
- }
+
+ // v6+ may or may not support unaligned mem access depending on the system
+ // configuration.
+ // FIXME: This is pretty conservative. Should we provide cmdline option to
+ // control the behaviour?
+ if (!Subtarget->isTargetDarwin())
+ return false;
switch (VT.getSimpleVT().SimpleTy) {
default:
@@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
if (StringRef("{cc}").equals_lower(Constraint))
- return std::make_pair(0U, ARM::CCRRegisterClass);
+ return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass);
return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
}
@@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
/// vector. If it is invalid, don't add anything to Ops.
void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
bool
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7517c..3a38669 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -70,6 +70,8 @@ namespace llvm {
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
+ TC_RETURN, // Tail call return pseudo.
+
THREAD_POINTER,
DYN_ALLOC, // Dynamic allocation on the stack.
@@ -133,6 +135,13 @@ namespace llvm {
VUZP, // unzip (deinterleave)
VTRN, // transpose
+ // Operands of the standard BUILD_VECTOR node are not legalized, which
+ // is fine if BUILD_VECTORs are always lowered to shuffles or other
+ // operations, but for ARM some BUILD_VECTORs are legal as-is and their
+ // operands need to be legalized. Define an ARM-specific version of
+ // BUILD_VECTOR for this purpose.
+ BUILD_VECTOR,
+
// Floating-point max and min:
FMAX,
FMIN
@@ -141,11 +150,12 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getVMOVImm - If this is a build_vector of constants which can be
- /// formed by using a VMOV instruction of the specified element size,
- /// return the constant being splatted. The ByteSize field indicates the
- /// number of bytes of each element [1248].
- SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
+ /// getNEONModImm - If this is a valid vector constant for a NEON
+ /// instruction with a "modified immediate" operand (e.g., VMOV) of the
+ /// specified element size, return the encoded value for that immediate.
+ /// The ByteSize field indicates the number of bytes of each element [1248].
+ SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
+ SelectionDAG &DAG);
/// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
/// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
@@ -189,9 +199,9 @@ namespace llvm {
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
- /// icmp immediate, that is the target has icmp instructions which can compare
- /// a register against the immediate without having to materialize the
- /// immediate into a register.
+ /// icmp immediate, that is the target has icmp instructions which can
+ /// compare a register against the immediate without having to materialize
+ /// the immediate into a register.
virtual bool isLegalICmpImmediate(int64_t Imm) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
@@ -232,7 +242,6 @@ namespace llvm {
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -282,7 +291,8 @@ namespace llvm {
SDValue &Root, SelectionDAG &DAG,
DebugLoc dl) const;
- CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const;
+ CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return,
+ bool isVarArg) const;
SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
@@ -303,6 +313,7 @@ namespace llvm {
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
@@ -327,18 +338,34 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
+ /// IsEligibleForTailCallOptimization - Check whether the call is eligible
+ /// for tail call optimization. Targets which want to do tail call
+ /// optimization should implement this function.
+ bool IsEligibleForTailCallOptimization(SDValue Callee,
+ CallingConv::ID CalleeCC,
+ bool isVarArg,
+ bool isCalleeStructRet,
+ bool isCallerStructRet,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ SelectionDAG& DAG) const;
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d487df1..ac568e7 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>;
def VFPMiscFrm : Format<23>;
def ThumbFrm : Format<24>;
-
-def NEONFrm : Format<25>;
-def NEONGetLnFrm : Format<26>;
-def NEONSetLnFrm : Format<27>;
-def NEONDupFrm : Format<28>;
-
-def MiscFrm : Format<29>;
-def ThumbMiscFrm : Format<30>;
-
-def NLdStFrm : Format<31>;
-def N1RegModImmFrm : Format<32>;
-def N2RegFrm : Format<33>;
-def NVCVTFrm : Format<34>;
-def NVDupLnFrm : Format<35>;
-def N2RegVShLFrm : Format<36>;
-def N2RegVShRFrm : Format<37>;
-def N3RegFrm : Format<38>;
-def N3RegVShFrm : Format<39>;
-def NVExtFrm : Format<40>;
-def NVMulSLFrm : Format<41>;
-def NVTBLFrm : Format<42>;
+def MiscFrm : Format<25>;
+
+def NGetLnFrm : Format<26>;
+def NSetLnFrm : Format<27>;
+def NDupFrm : Format<28>;
+def NLdStFrm : Format<29>;
+def N1RegModImmFrm: Format<30>;
+def N2RegFrm : Format<31>;
+def NVCVTFrm : Format<32>;
+def NVDupLnFrm : Format<33>;
+def N2RegVShLFrm : Format<34>;
+def N2RegVShRFrm : Format<35>;
+def N3RegFrm : Format<36>;
+def N3RegVShFrm : Format<37>;
+def NVExtFrm : Format<38>;
+def NVMulSLFrm : Format<39>;
+def NVTBLFrm : Format<40>;
// Misc flags.
@@ -1653,17 +1649,17 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
class NVGetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONGetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NGetLnFrm, itin,
opc, dt, asm, pattern>;
class NVSetLane<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONSetLnFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NSetLnFrm, itin,
opc, dt, asm, pattern>;
class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
- : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
+ : NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NDupFrm, itin,
opc, dt, asm, pattern>;
// Vector Duplicate Lane (from scalar to all elements)
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 85f6b40..ba228ff 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
void ARMInstrInfo::
reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
DebugLoc dl = Orig->getDebugLoc();
unsigned Opcode = Orig->getOpcode();
switch (Opcode) {
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index d4199d1..4563ffe 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,7 +35,7 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index f3156d9..c73e204 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
+def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
+ [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #$label", []>;
+} // neverHasSideEffects
def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
Pseudo, IIC_iALUi,
"adr$p\t$dst, #${label}_${id}", []> {
let Inst{25} = 1;
}
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -1026,6 +1031,74 @@ let isCall = 1,
}
}
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+ // Darwin versions.
+ let Defs = [R0, R1, R2, R3, R9, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>;
+
+ def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]>;
+
+ def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+
+ // Non-Darwin versions (the difference is R9).
+ let Defs = [R0, R1, R2, R3, R12,
+ D0, D1, D2, D3, D4, D5, D6, D7,
+ D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26,
+ D27, D28, D29, D30, D31, PC],
+ Uses = [SP] in {
+ def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops),
+ Pseudo, IIC_Br,
+ "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>;
+
+ def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b\t$dst @ TAILCALL",
+ []>, Requires<[IsARM, IsNotDarwin]>;
+
+ def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops),
+ IIC_Br, "b.w\t$dst @ TAILCALL",
+ []>, Requires<[IsThumb, IsNotDarwin]>;
+
+ def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops),
+ BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL",
+ []>, Requires<[IsNotDarwin]> {
+ let Inst{7-4} = 0b0001;
+ let Inst{19-8} = 0b111111111111;
+ let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
+ }
+ }
+}
+
let isBranch = 1, isTerminator = 1 in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
@@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
let Inst{25} = 0;
}
+// A version for the smaller set of tail call registers.
+let neverHasSideEffects = 1 in
+def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm,
+ IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP {
+ let Inst{11-4} = 0b00000000;
+ let Inst{25} = 0;
+}
+
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
@@ -2530,31 +2611,30 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
- "add\t$val, pc, #8\n\t"
- "str\t$val, [$src, #+4]\n\t"
- "mov\tr0, #0\n\t"
- "add\tpc, pc, #0\n\t"
- "mov\tr0, #1 ${:comment} eh_setjmp end", "",
+ "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t"
+ "str\t$val, [$src, #+4]\n\t"
+ "mov\tr0, #0\n\t"
+ "add\tpc, pc, #0\n\t"
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
@@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// TODO: add,sub,and, 3-instr forms?
+// Tail calls
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>;
+
+def : ARMPat<(ARMtcret tcGPR:$dst),
+ (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
+
+def : ARMPat<(ARMtcret (i32 texternalsym:$dst)),
+ (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>;
// Direct calls
def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 197ec16..a84315f 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
// NEON operand definitions
//===----------------------------------------------------------------------===//
-def h8imm : Operand<i8> {
- let PrintMethod = "printHex8ImmOperand";
-}
-def h16imm : Operand<i16> {
- let PrintMethod = "printHex16ImmOperand";
-}
-def h32imm : Operand<i32> {
- let PrintMethod = "printHex32ImmOperand";
-}
-def h64imm : Operand<i64> {
- let PrintMethod = "printHex64ImmOperand";
+def nModImm : Operand<i32> {
+ let PrintMethod = "printNEONModImmOperand";
}
//===----------------------------------------------------------------------===//
@@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXForm<imm, [{
assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
-def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
- assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
- return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
- MVT::i32);
-}]>;
// Extract S sub-registers of Q/D registers.
def SSubReg_f32_reg : SDNodeXForm<imm, [{
@@ -2282,7 +2268,7 @@ def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
// For disassembly only.
defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
- "$dst, $src, #0">;
+ "$dst, $src, #0">;
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q,
@@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 1, *CurDAG);
+ return ARM::getNEONModImm(N, 1, true, *CurDAG);
}]>;
def vmovImm8 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm8>;
// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 2, *CurDAG);
+ return ARM::getNEONModImm(N, 2, true, *CurDAG);
}]>;
def vmovImm16 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm16>;
// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 4, *CurDAG);
+ return ARM::getNEONModImm(N, 4, true, *CurDAG);
}]>;
def vmovImm32 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm32>;
// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
- return ARM::getVMOVImm(N, 8, *CurDAG);
+ return ARM::getNEONModImm(N, 8, true, *CurDAG);
}]>;
def vmovImm64 : PatLeaf<(build_vector), [{
- return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0;
+ return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
}], VMOV_get_imm64>;
-// Note: Some of the cmode bits in the following VMOV instructions need to
-// be encoded based on the immed values.
-
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
- (ins h8imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
-def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
-def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h16imm:$SIMM), IIC_VMOVImm,
+def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
-def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst),
- (ins h32imm:$SIMM), IIC_VMOVImm,
+def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
- (ins h64imm:$SIMM), IIC_VMOVImm,
+ (ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
} // isReMaterializable
@@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "",
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
-def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (i64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
- (INSERT_SUBREG QPR:$src,
- (f64 (EXTRACT_SUBREG QPR:$src,
- (DSubReg_f64_reg imm:$lane))),
- (DSubReg_f64_other_reg imm:$lane))>;
-
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
"vmovn", "i", int_arm_neon_vmovn>;
@@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
- DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
- DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
(ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
NVTBLFrm, IIC_VTB4,
- "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
- DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>;
} // hasExtraSrcRegAllocReq = 1
// VTBX : Vector Table Extension
@@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
- DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
(ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
NVTBLFrm, IIC_VTBX3,
- "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
+ "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src",
+ "$orig = $dst", []>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
- "$orig = $dst",
- [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
+ "$orig = $dst", []>;
} // hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 40f924b..bc0790d 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p\t$dst, #$label", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
def tLEApcrelJT : T1I<(outs tGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
-} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -923,18 +923,18 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1,
+ isBarrier = 1 in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
@@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index b91c089..4692f2a 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -637,8 +637,7 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode GPR:$src))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -649,8 +648,7 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
- Requires<[HasT2ExtractPack]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -661,8 +659,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// SXTB16 and UXTB16 do not need the .w qualifier.
-multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
+// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier.
+multiclass T2I_unary_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
[(set GPR:$dst, (opnode GPR:$src))]>,
@@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
}
}
-// DO variant - disassembly only, no pattern
-
-multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern
+// supported yet.
+multiclass T2I_unary_rrot_sxtb16<bits<3> opcod, string opc> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src", []> {
let Inst{31-27} = 0b11111;
@@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
+} // neverHasSideEffects
def t2LEApcrelJT : T2XI<(outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi,
"adr$p.w\t$dst, #${label}_${id}", []> {
@@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
-} // neverHasSideEffects
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
-defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
+defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">;
defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
@@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb",
UnOpFrag<(and node:$Src, 0x000000FF)>>;
defm t2UXTH : T2I_unary_rrot<0b001, "uxth",
UnOpFrag<(and node:$Src, 0x0000FFFF)>>;
-defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16",
+defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16",
UnOpFrag<(and node:$Src, 0x00FF00FF)>>;
def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 24)>;
+ (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>;
def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF),
- (t2UXTB16r_rot GPR:$Src, 8)>;
+ (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>;
defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
@@ -2389,37 +2387,36 @@ let isCall = 1,
// except for our own input by listing the relevant registers in Defs. By
// doing so, we also cause the prologue/epilogue code to actively preserve
// all of the callee-saved resgisters, which is exactly what we want.
-// The current SP is passed in $val, and we reuse the reg as a scratch.
+// $val is a scratch register for our use.
let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0,
D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30,
- D31 ] in {
+ D31 ], hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
}
let Defs =
- [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ],
+ hasSideEffects = 1, isBarrier = 1 in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
- "\tmov\t$val, pc\n"
- "\tadds\t$val, #7\n"
- "\tstr\t$val, [$src, #4]\n"
- "\tmovs\tr0, #0\n"
- "\tb\t1f\n"
- "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
+ "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t"
+ "adds\t$val, #7\n\t"
+ "str\t$val, [$src, #4]\n\t"
+ "movs\tr0, #0\n\t"
+ "b\t1f\n\t"
+ "movs\tr0, #1\t${:comment} end eh.setjmp\n\t"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
@@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
// IT block
+let Defs = [ITSTATE] in
def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
AddrModeNone, Size2Bytes, IIC_iALUx,
"it$mask\t$cc", "", []> {
@@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary,
+ "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 54474cf..84c23e1 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
// Between half-precision and single-precision. For disassembly only.
-def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f32_to_f16 SPR:$a),
(i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
def : ARMPat<(f16_to_f32 GPR:$a),
(VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
-def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
+def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a),
/* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a",
[/* For disassembly only; pattern left blank */]>;
diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h
index ff332b7..f5d9eff 100644
--- a/lib/Target/ARM/ARMJITInfo.h
+++ b/lib/Target/ARM/ARMJITInfo.h
@@ -143,7 +143,8 @@ namespace llvm {
JumpTableId2AddrMap[JTI] = Addr;
}
- /// getPCLabelAddr - Retrieve the address of the PC label of the specified id.
+ /// getPCLabelAddr - Retrieve the address of the PC label of the
+ /// specified id.
intptr_t getPCLabelAddr(unsigned Id) const {
DenseMap<unsigned, intptr_t>::const_iterator I = PCLabelMap.find(Id);
assert(I != PCLabelMap.end());
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 8585c1e..f80e316 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -74,11 +74,14 @@ namespace {
private:
struct MemOpQueueEntry {
int Offset;
+ unsigned Reg;
+ bool isKill;
unsigned Position;
MachineBasicBlock::iterator MBBI;
bool Merged;
- MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i)
- : Offset(o), Position(p), MBBI(i), Merged(false) {}
+ MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
+ MachineBasicBlock::iterator i)
+ : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
};
typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
typedef MemOpQueue::iterator MemOpQueueIter;
@@ -128,30 +131,30 @@ namespace {
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::LDM;
case ARM::STR:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::STM;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
- NumLDMGened++;
+ ++NumLDMGened;
return ARM::t2LDM;
case ARM::t2STRi8:
case ARM::t2STRi12:
- NumSTMGened++;
+ ++NumSTMGened;
return ARM::t2STM;
case ARM::VLDRS:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMS;
case ARM::VSTRS:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMS;
case ARM::VLDRD:
- NumVLDMGened++;
+ ++NumVLDMGened;
return ARM::VLDMD;
case ARM::VSTRD:
- NumVSTMGened++;
+ ++NumVSTMGened;
return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
@@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
// MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
// success.
-void ARMLoadStoreOpt::
-MergeOpsUpdate(MachineBasicBlock &MBB,
- MemOpQueue &memOps,
- unsigned memOpsBegin,
- unsigned memOpsEnd,
- unsigned insertAfter,
- int Offset,
- unsigned Base,
- bool BaseKill,
- int Opcode,
- ARMCC::CondCodes Pred,
- unsigned PredReg,
- unsigned Scratch,
- DebugLoc dl,
- SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
+void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
+ MemOpQueue &memOps,
+ unsigned memOpsBegin, unsigned memOpsEnd,
+ unsigned insertAfter, int Offset,
+ unsigned Base, bool BaseKill,
+ int Opcode,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ unsigned Scratch,
+ DebugLoc dl,
+ SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
// First calculate which of the registers should be killed by the merged
// instruction.
- SmallVector<std::pair<unsigned, bool>, 8> Regs;
const unsigned insertPos = memOps[insertAfter].Position;
+
+ SmallSet<unsigned, 4> UnavailRegs;
+ SmallSet<unsigned, 4> KilledRegs;
+ DenseMap<unsigned, unsigned> Killer;
+ for (unsigned i = 0; i < memOpsBegin; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ if (memOps[i].Merged)
+ UnavailRegs.insert(Reg);
+ else {
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+ }
+ for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) {
+ if (memOps[i].Position < insertPos && memOps[i].isKill) {
+ unsigned Reg = memOps[i].Reg;
+ KilledRegs.insert(Reg);
+ Killer[Reg] = i;
+ }
+ }
+
+ SmallVector<std::pair<unsigned, bool>, 8> Regs;
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
- const MachineOperand &MO = memOps[i].MBBI->getOperand(0);
- unsigned Reg = MO.getReg();
- bool isKill = MO.isKill();
+ unsigned Reg = memOps[i].Reg;
+ if (UnavailRegs.count(Reg))
+ // Register is killed before and it's not easy / possible to update the
+ // kill marker on already merged instructions. Abort.
+ return;
// If we are inserting the merged operation after an unmerged operation that
// uses the same register, make sure to transfer any kill flag.
- for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j)
- if (memOps[j].Position<insertPos) {
- const MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Reg && MOJ.isKill())
- isKill = true;
- }
-
+ bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
Regs.push_back(std::make_pair(Reg, isKill));
}
// Try to do the merge.
MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
- Loc++;
+ ++Loc;
if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
Pred, PredReg, Scratch, dl, Regs))
return;
@@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB,
Merges.push_back(prior(Loc));
for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
// Remove kill flags from any unmerged memops that come before insertPos.
- if (Regs[i-memOpsBegin].second)
- for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j)
- if (memOps[j].Position<insertPos) {
- MachineOperand &MOJ = memOps[j].MBBI->getOperand(0);
- if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill())
- MOJ.setIsKill(false);
- }
+ if (Regs[i-memOpsBegin].second) {
+ unsigned Reg = Regs[i-memOpsBegin].first;
+ if (KilledRegs.count(Reg)) {
+ unsigned j = Killer[Reg];
+ memOps[j].MBBI->getOperand(0).setIsKill(false);
+ }
+ }
MBB.erase(memOps[i].MBBI);
memOps[i].Merged = true;
}
@@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isAM4) {
if (Mode == ARM_AM::ia &&
isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (isAM4) {
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
@@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
// Try merging with the previous instruction.
- if (MBBI != MBB.begin()) {
+ MachineBasicBlock::iterator BeginMBBI = MBB.begin();
+ if (MBBI != BeginMBBI) {
MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
+ while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
+ --PrevMBBI;
if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
AddSub = ARM_AM::sub;
@@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
}
// Try merging with the next instruction.
- if (!DoMerge && MBBI != MBB.end()) {
+ MachineBasicBlock::iterator EndMBBI = MBB.end();
+ if (!DoMerge && MBBI != EndMBBI) {
MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
+ while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
+ ++NextMBBI;
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
@@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
/// isMemoryOp - Returns true if instruction is a memory operations (that this
/// pass is capable of operating on).
static bool isMemoryOp(const MachineInstr *MI) {
- if (MI->hasOneMemOperand()) {
- const MachineMemOperand *MMO = *MI->memoperands_begin();
+ // When no memory operands are present, conservatively assume unaligned,
+ // volatile, unfoldable.
+ if (!MI->hasOneMemOperand())
+ return false;
- // Don't touch volatile memory accesses - we may be changing their order.
- if (MMO->isVolatile())
- return false;
+ const MachineMemOperand *MMO = *MI->memoperands_begin();
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
- // not.
- if (MMO->getAlignment() < 4)
- return false;
- }
+ // Don't touch volatile memory accesses - we may be changing their order.
+ if (MMO->isVolatile())
+ return false;
+
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
+ if (MMO->getAlignment() < 4)
+ return false;
// str <undef> could probably be eliminated entirely, but for now we just want
// to avoid making a mess of it.
@@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
return false;
+ MachineBasicBlock::iterator NewBBI = MBBI;
bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
bool EvenDeadKill = isLd ?
@@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
++NumSTRD2STM;
}
+ NewBBI = llvm::prior(MBBI);
} else {
// Split into two instructions.
assert((!isT2 || !OffReg) &&
@@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
OddReg, OddDeadKill, false,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, false,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
if (OddReg == EvenReg && EvenDeadKill) {
- // If the two source operands are the same, the kill marker is probably
- // on the first one. e.g.
+ // If the two source operands are the same, the kill marker is
+ // probably on the first one. e.g.
// t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
EvenDeadKill = false;
OddDeadKill = true;
@@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
Pred, PredReg, TII, isT2);
+ NewBBI = llvm::prior(MBBI);
InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
OddReg, OddDeadKill, OddUndef,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
@@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
++NumSTRD2STR;
}
- MBBI = prior(MBBI);
MBB.erase(MI);
+ MBBI = NewBBI;
+ return true;
}
return false;
}
@@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
if (isMemOp) {
int Opcode = MBBI->getOpcode();
unsigned Size = getLSMultipleTransferSize(MBBI);
+ const MachineOperand &MO = MBBI->getOperand(0);
+ unsigned Reg = MO.getReg();
+ bool isKill = MO.isDef() ? false : MO.isKill();
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
@@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
CurrSize = Size;
CurrPred = Pred;
CurrPredReg = PredReg;
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
if (Clobber) {
@@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
// No need to match PredReg.
// Continue adding to the queue.
if (Offset > MemOps.back().Offset) {
- MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
} else {
for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
I != E; ++I) {
if (Offset < I->Offset) {
- MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI));
- NumMemOps++;
+ MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
+ Position, MBBI));
+ ++NumMemOps;
Advance = true;
break;
} else if (Offset == I->Offset) {
@@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
}
}
- if (Advance) {
+ if (MBBI->isDebugValue()) {
+ ++MBBI;
+ if (MBBI == E)
+ // Reach the end of the block, try merging the memory instructions.
+ TryMerge = true;
+ } else if (Advance) {
++Position;
++MBBI;
if (MBBI == E)
@@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
// some day.
SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
- if (MemOps.count(&*I))
+ if (I->isDebugValue() || MemOps.count(&*I))
continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
@@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
std::sort(Ops.begin(), Ops.end(), OffsetCompare());
// The loads / stores of the same base are in order. Scan them from first to
- // last and check for the followins:
+ // last and check for the following:
// 1. Any def of base.
// 2. Any gaps.
while (Ops.size() > 1) {
@@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end() && MemOps.count(InsertPos))
+ while (InsertPos != MBB->end()
+ && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
++InsertPos;
// If we are moving a pair of loads / stores, see if it makes sense
@@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
break;
}
- MI2LocMap[MI] = Loc++;
+ if (!MI->isDebugValue())
+ MI2LocMap[MI] = ++Loc;
+
if (!isMemoryOp(MI))
continue;
unsigned PredReg = 0;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index 0134276..7e57a1c 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// VarArgsFrameIndex - FrameIndex for start of varargs area.
int VarArgsFrameIndex;
+ /// HasITBlocks - True if IT blocks have been inserted.
+ bool HasITBlocks;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -97,7 +100,8 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
explicit ARMFunctionInfo(MachineFunction &MF) :
isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()),
@@ -108,7 +112,8 @@ public:
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32),
SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()),
- JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {}
+ JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0),
+ HasITBlocks(false) {}
bool isThumbFunction() const { return isThumb; }
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
@@ -229,6 +234,9 @@ public:
int getVarArgsFrameIndex() const { return VarArgsFrameIndex; }
void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; }
+
+ bool hasITBlocks() const { return HasITBlocks; }
+ void setHasITBlocks(bool h) { HasITBlocks = h; }
};
} // End llvm namespace
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 6beca8b..d020f3c 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>;
// Pseudo 256-bit registers to represent pairs of Q registers. These should
// never be present in the emitted code.
-// These are used for NEON load / store instructions, e.g. vld4, vst3.
-// NOTE: It's possible to define more QQ registers since technical the
-// starting D register number doesn't have to be multiple of 4. e.g.
-// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
-// stuffs very messy.
+// These are used for NEON load / store instructions, e.g., vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technically the
+// starting D register number doesn't have to be multiple of 4, e.g.,
+// D1, D2, D3, D4 would be a legal quad, but that would make the subregister
+// stuff very messy.
let SubRegIndices = [qsub_0, qsub_1] in {
let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
(ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
@@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
(ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
(ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
(ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
- (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in
+{
def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
}
@@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
}
// Current Program Status Register.
-def CPSR : ARMReg<0, "cpsr">;
-
-def FPSCR : ARMReg<1, "fpscr">;
+def CPSR : ARMReg<0, "cpsr">;
+def FPSCR : ARMReg<1, "fpscr">;
+def ITSTATE : ARMReg<2, "itstate">;
// Register classes.
//
@@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
}];
}
+// For tail calls, we can't use callee-saved registers, as they are restored
+// to the saved value before the tail call, which would clobber a call address.
+// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
+// this class and the preceding one(!) This is what we want.
+def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
+ let MethodProtos = [{
+ iterator allocation_order_begin(const MachineFunction &MF) const;
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ // R9 is available.
+ static const unsigned ARM_GPR_R9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R9, ARM::R12 };
+ // R9 is not available.
+ static const unsigned ARM_GPR_NOR9_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+ ARM::R12 };
+
+ // For Thumb1 mode, we don't want to allocate hi regs at all, as we
+ // don't know how to spill them. If we make our prologue/epilogue code
+ // smarter at some point, we can go back to using the above allocation
+ // orders for the Thumb1 instructions that know how to use hi regs.
+ static const unsigned THUMB_GPR_AO_TC[] = {
+ ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ if (Subtarget.isThumb1Only())
+ return THUMB_GPR_AO_TC;
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ return ARM_GPR_NOR9_TC;
+ else
+ return ARM_GPR_R9_TC;
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ return ARM_GPR_NOR9_TC;
+ }
+
+ tcGPRClass::iterator
+ tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
+ GPRClass::iterator I;
+
+ if (Subtarget.isThumb1Only()) {
+ I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
+ return I;
+ }
+
+ if (Subtarget.isTargetDarwin()) {
+ if (Subtarget.isR9Reserved())
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ else
+ I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned));
+ } else
+ // R9 is either callee-saved or reserved; can't use it.
+ I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
+ return I;
+ }
+ }];
+}
+
+
// Scalar single precision floating point register class..
def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
@@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64],
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-
diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td
index bbfc0b2..282abca 100644
--- a/lib/Target/ARM/ARMScheduleA8.td
+++ b/lib/Target/ARM/ARMScheduleA8.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A8 processors.
@@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_iALUx , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>]>,
//
// Binary Instructions that produce a result
- InstrItinData<IIC_iALUi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iALUr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
- InstrItinData<IIC_iALUsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
- InstrItinData<IIC_iALUsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
+ InstrItinData<IIC_iALUi ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr ,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2, 1, 1]>,
//
// Unary Instructions that produce a result
- InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iUNAsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iUNAsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Compare instructions
- InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
- InstrItinData<IIC_iCMPsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMPsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
//
// Move instructions, unconditional
- InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
- InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
- InstrItinData<IIC_iMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [1, 1, 1]>,
//
// Move instructions, conditional
- InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
- InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
- InstrItinData<IIC_iCMOVsr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>], [2, 1, 1]>,
// Integer multiply pipeline
// Result written in E5, but that is relative to the last cycle of multicycle,
// so we use 6 for those cases
//
InstrItinData<IIC_iMUL16 , [InstrStage<1, [A8_Pipe0]>], [5, 1, 1]>,
- InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>,
- InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A8_Pipe1], 0>,
InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>,
- InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
- InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A8_Pipe1], 0>,
InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>,
-
+
// Integer load pipeline
//
// loads have an extra cycle of latency, but are fully pipelined
@@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0]>]>,
-
+
// Branch
//
// no delay slots, so the latency of a branch is unimportant
@@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad32, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Load
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoad64, [InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Load Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpLoadm, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// Single-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore32,[InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// Double-precision FP Store
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStore64,[InstrStage<2, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0], 0>,
InstrStage<1, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// FP Store Multiple
// use A8_Issue to enforce the 1 load/store per cycle limit
- InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
+ InstrItinData<IIC_fpStorem, [InstrStage<3, [A8_Issue], 0>,
InstrStage<2, [A8_Pipe0], 0>,
InstrStage<2, [A8_Pipe1]>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries<
//
// VLD1
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD1, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
//
// VLD2
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD2, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>,
//
// VLD3
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD3, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>,
//
// VLD4
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VLD4, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
// FIXME: We don't model this instruction properly
- InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
+ InstrItinData<IIC_VST, [InstrStage<1, [A8_Issue], 0>,
InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_LdSt0], 0>,
InstrStage<1, [A8_NLSPipe]>]>,
@@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTB4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
@@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries<
InstrItinData<IIC_VTBX3, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>,
+ InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A8_Pipe0, A8_Pipe1]>,
InstrStage<1, [A8_NLSPipe]>,
InstrStage<1, [A8_NPipe], 0>,
- InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td
index 75320d9..df2f896 100644
--- a/lib/Target/ARM/ARMScheduleA9.td
+++ b/lib/Target/ARM/ARMScheduleA9.td
@@ -1,10 +1,10 @@
//=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
// This file defines the itinerary class data for the ARM Cortex A9 processors.
@@ -16,7 +16,6 @@
// Reference Manual".
//
// Functional units
-def A9_Issue : FuncUnit; // issue
def A9_Pipe0 : FuncUnit; // pipeline 0
def A9_Pipe1 : FuncUnit; // pipeline 1
def A9_LSPipe : FuncUnit; // LS pipe
@@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side
// Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1
//
def CortexA9Itineraries : ProcessorItineraries<
- [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [
+ [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [
+ // Two fully-pipelined integer ALU pipelines
+ // FIXME: There are no operand latencies for these instructions at all!
+ //
+ // Move instructions, unconditional
+ InstrItinData<IIC_iMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1]>,
+ InstrItinData<IIC_iMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [1, 1]>,
+ InstrItinData<IIC_iMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ //
+ // No operand cycles
+ InstrItinData<IIC_iALUx , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+ //
+ // Binary Instructions that produce a result
+ InstrItinData<IIC_iALUi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iALUr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2, 2]>,
+ InstrItinData<IIC_iALUsi, [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1]>,
+ InstrItinData<IIC_iALUsr,[InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 2, 1, 1]>,
+ //
+ // Unary Instructions that produce a result
+ InstrItinData<IIC_iUNAr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iUNAsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iUNAsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Compare instructions
+ InstrItinData<IIC_iCMPi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMPr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 2]>,
+ InstrItinData<IIC_iCMPsi , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMPsr , [InstrStage<3, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+ //
+ // Move instructions, conditional
+ InstrItinData<IIC_iCMOVi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2]>,
+ InstrItinData<IIC_iCMOVr , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsi , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>], [2, 1]>,
+ InstrItinData<IIC_iCMOVsr , [InstrStage<2, [A9_Pipe0, A9_Pipe1]>], [2, 1, 1]>,
+
+ // Integer multiply pipeline
+ //
+ InstrItinData<IIC_iMUL16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC16 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>,
+ InstrItinData<IIC_iMAC32 , [InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>,
+ InstrItinData<IIC_iMUL64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ InstrItinData<IIC_iMAC64 , [InstrStage<2, [A9_Pipe1], 0>,
+ InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>,
+ // Integer load pipeline
+ // FIXME: The timings are some rough approximations
+ //
+ // Immediate offset
+ InstrItinData<IIC_iLoadi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iLoadr , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iLoadsi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iLoadiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iLoadru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iLoadsiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>,
+ //
+ // Load multiple
+ InstrItinData<IIC_iLoadm , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+
+ // Integer store pipeline
+ ///
+ // Immediate offset
+ InstrItinData<IIC_iStorei , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1]>,
+ //
+ // Register offset
+ InstrItinData<IIC_iStorer , [InstrStage<1, [ A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Scaled register offset
+ InstrItinData<IIC_iStoresi , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>,
+ //
+ // Immediate offset with update
+ InstrItinData<IIC_iStoreiu , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>,
+ //
+ // Register offset with update
+ InstrItinData<IIC_iStoreru , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>,
+ //
+ // Scaled register offset with update
+ InstrItinData<IIC_iStoresiu, [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>,
+ //
+ // Store multiple
+ InstrItinData<IIC_iStorem , [InstrStage<1, [A9_Pipe1]>,
+ InstrStage<1, [A9_LSPipe]>]>,
+ // Branch
+ //
+ // no delay slots, so the latency of a branch is unimportant
+ InstrItinData<IIC_Br , [InstrStage<1, [A9_Pipe0, A9_Pipe1]>]>,
+
// VFP and NEON shares the same register file. This means that every VFP
// instruction should wait for full completion of the consecutive NEON
// instruction and vice-versa. We model this behavior with two artificial FUs:
@@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries<
// register file writeback!).
// Every NEON instruction does the same but with FUs swapped.
//
- // Since the reserved FU cannot be acquired this models precisly "cross-domain"
- // stalls.
+ // Since the reserved FU cannot be acquired, this models precisely
+ // "cross-domain" stalls.
// VFP
// Issue through integer pipeline, and execute in NEON unit.
@@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries<
// FP Special Register to Integer Register File Move
InstrItinData<IIC_fpSTAT , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Unary
InstrItinData<IIC_fpUNA32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Unary
InstrItinData<IIC_fpUNA64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
@@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpCMP32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision FP Compare
InstrItinData<IIC_fpCMP64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra latency cycles since wbck is 4 cycles
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Single to Double FP Convert
InstrItinData<IIC_fpCVTSD , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double to Single FP Convert
InstrItinData<IIC_fpCVTDS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single to Half FP Convert
InstrItinData<IIC_fpCVTSH , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Half to Single FP Convert
InstrItinData<IIC_fpCVTHS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Single-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTSI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-Precision FP to Integer Convert
InstrItinData<IIC_fpCVTDI , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Single-Precision FP Convert
InstrItinData<IIC_fpCVTIS , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Integer to Double-Precision FP Convert
InstrItinData<IIC_fpCVTID , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Single-precision FP ALU
InstrItinData<IIC_fpALU32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-precision FP ALU
InstrItinData<IIC_fpALU64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<5, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Single-precision FP Multiply
InstrItinData<IIC_fpMUL32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<6, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 1, 1]>,
//
// Double-precision FP Multiply
InstrItinData<IIC_fpMUL64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<7, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 1, 1]>,
//
// Single-precision FP MAC
InstrItinData<IIC_fpMAC32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<9, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>,
//
// Double-precision FP MAC
InstrItinData<IIC_fpMAC64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<10, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>,
//
// Single-precision FP DIV
InstrItinData<IIC_fpDIV32 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<16, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<10, [A9_NPipe]>], [15, 1, 1]>,
//
// Double-precision FP DIV
InstrItinData<IIC_fpDIV64 , [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<26, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<20, [A9_NPipe]>], [25, 1, 1]>,
//
// Single-precision FP SQRT
InstrItinData<IIC_fpSQRT32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<18, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<13, [A9_NPipe]>], [17, 1]>,
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<13, [A9_NPipe]>], [17, 1]>,
//
// Double-precision FP SQRT
InstrItinData<IIC_fpSQRT64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<33, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<28, [A9_NPipe]>], [32, 1]>,
//
@@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_fpMOVIS, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_fpMOVID, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
// Extra 1 latency cycle since wbck is 2 cycles
InstrStage<3, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_fpMOVSI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_fpMOVDI, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [1, 1, 1]>,
//
// Single-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad32, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Load
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoad64, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Load Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpLoadm, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Single-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore32,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-precision FP Store
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStore64,[InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// FP Store Multiple
- // use A9_Issue to enforce the 1 load/store per cycle limit
InstrItinData<IIC_fpStorem, [InstrStage<1, [A9_DRegsVFP], 0, Required>,
InstrStage<2, [A9_DRegsN], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
// NEON
// Issue through integer pipeline, and execute in NEON unit.
- // FIXME: Neon pipeline and LdSt unit are multiplexed.
+ // FIXME: Neon pipeline and LdSt unit are multiplexed.
// Add some syntactic sugar to model this!
// VLD1
// FIXME: We don't model this instruction properly
InstrItinData<IIC_VLD1, [InstrStage<1, [A9_DRegsN], 0, Required>,
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// VLD2
@@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// VLD3
@@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>,
//
// VLD4
@@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VLD4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>,
//
// VST
@@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VST, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Issue], 0>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<1, [A9_LSPipe], 0>,
+ InstrStage<1, [A9_Pipe1], 0>,
+ InstrStage<1, [A9_LSPipe]>,
InstrStage<1, [A9_NPipe]>]>,
//
// Double-register Integer Unary
InstrItinData<IIC_VUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Quad-register Integer Unary
InstrItinData<IIC_VUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2]>,
//
// Double-register Integer Q-Unary
InstrItinData<IIC_VQUNAiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Quad-register Integer CountQ-Unary
InstrItinData<IIC_VQUNAiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1]>,
//
// Double-register Integer Binary
InstrItinData<IIC_VBINiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Binary
InstrItinData<IIC_VBINiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Double-register Integer Subtract
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Quad-register Integer Subtract
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 1]>,
//
// Double-register Integer Shift
InstrItinData<IIC_VSHLiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Quad-register Integer Shift
InstrItinData<IIC_VSHLiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 1, 1]>,
//
// Double-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Quad-register Integer Shift (4 cycle)
InstrItinData<IIC_VSHLi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 1, 1]>,
//
// Double-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Quad-register Integer Binary (4 cycle)
InstrItinData<IIC_VBINi4Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
// Quad-register Integer Subtract (4 cycle)
InstrItinData<IIC_VSUBiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [4, 2, 1]>,
//
@@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3, 2, 2]>,
//
// Quad-register Integer Count
@@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VCNTiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [4, 2, 2]>,
//
// Double-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register Absolute Difference and Accumulate
InstrItinData<IIC_VABAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Double-register Integer Pair Add Long
InstrItinData<IIC_VPALiD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 1]>,
//
// Quad-register Integer Pair Add Long
InstrItinData<IIC_VPALiQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 1]>,
//
@@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Integer Multiply (.8, .16)
InstrItinData<IIC_VMULi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 2]>,
//
@@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMULi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 2, 1]>,
//
// Quad-register Integer Multiply (.32)
InstrItinData<IIC_VMULi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 2, 1]>,
//
// Double-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>,
//
// Double-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32D, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>,
//
// Quad-register Integer Multiply-Accumulate (.8, .16)
InstrItinData<IIC_VMACi16Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>,
//
// Quad-register Integer Multiply-Accumulate (.32)
InstrItinData<IIC_VMACi32Q, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>,
//
// Move Immediate
InstrItinData<IIC_VMOVImm, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [3]>,
//
// Double-register Permute Move
InstrItinData<IIC_VMOVD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_LSPipe]>], [2, 1]>,
//
// Quad-register Permute Move
@@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMOVQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1]>,
//
// Integer to Single-precision Move
InstrItinData<IIC_VMOVIS , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Integer to Double-precision Move
InstrItinData<IIC_VMOVID , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Single-precision to Integer Move
InstrItinData<IIC_VMOVSI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1]>,
//
// Double-precision to Integer Move
InstrItinData<IIC_VMOVDI , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<3, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1]>,
//
// Integer to Lane Move
InstrItinData<IIC_VMOVISL , [InstrStage<1, [A9_DRegsN], 0, Required>,
// FIXME: all latencies are arbitrary, no information is available
InstrStage<4, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
@@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2]>,
//
// Quad-register FP Unary
@@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VUNAQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2]>,
//
// Double-register FP Binary
@@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBIND, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [5, 2, 2]>,
//
// Quad-register FP Binary
@@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VBINQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Double-register FP Multiple-Accumulate
InstrItinData<IIC_VMACD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>,
//
// Quad-register FP Multiple-Accumulate
@@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VMACQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>,
//
// Double-register Reciprical Step
InstrItinData<IIC_VRECSD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [6, 2, 2]>,
//
// Quad-register Reciprical Step
InstrItinData<IIC_VRECSQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<10, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<4, [A9_NPipe]>], [8, 2, 2]>,
//
// Double-register Permute
InstrItinData<IIC_VPERMD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 6 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>,
//
// Quad-register Permute
@@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>,
//
// Quad-register Permute (3 cycle issue)
@@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VPERMQ3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>,
//
@@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries<
InstrItinData<IIC_VEXTD, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<7, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<1, [A9_NPipe]>], [2, 1, 1]>,
//
// Quad-register VEXT
InstrItinData<IIC_VEXTQ, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 9 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 1]>,
//
// VTB
InstrItinData<IIC_VTB1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 1]>,
InstrItinData<IIC_VTB2, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>,
InstrItinData<IIC_VTB3, [InstrStage<2, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>,
InstrItinData<IIC_VTB4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>,
//
// VTBX
InstrItinData<IIC_VTBX1, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>,
InstrItinData<IIC_VTBX2, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 7 cycles
InstrStage<8, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>,
InstrItinData<IIC_VTBX3, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
+ InstrStage<1, [A9_Pipe1]>,
InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>,
InstrItinData<IIC_VTBX4, [InstrStage<1, [A9_DRegsN], 0, Required>,
// Extra latency cycles since wbck is 8 cycles
InstrStage<9, [A9_DRegsVFP], 0, Reserved>,
- InstrStage<1, [A9_Pipe0, A9_Pipe1]>,
- InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
+ InstrStage<1, [A9_Pipe1]>,
+ InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]>
]>;
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
index f813022..08b560c 100644
--- a/lib/Target/ARM/ARMScheduleV6.td
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -16,7 +16,7 @@
// Functional Units
def V6_Pipe : FuncUnit; // pipeline
-// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual".
+// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual"
//
def ARMV6Itineraries : ProcessorItineraries<
[V6_Pipe], [
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4a9252..09203f9 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
- std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
+ std::string("e-p:32:32-f64:32:32-i64:32:32-"
+ "v128:32:128-v64:32:64-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-"
+ "v128:64:128-v64:64:64-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:32:128-v64:32:64-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
+ "i16:16:32-i8:8:32-i1:8:32-"
+ "v128:64:128-v64:64:64-a:0:32-n32")),
TLInfo(*this),
TSInfo(*this) {
}
@@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
+
return true;
}
@@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
// proper scheduling.
PM.add(createARMExpandPseudoPass());
- return true;
-}
-
-bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
- // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None) {
if (!Subtarget.isThumb1Only())
PM.add(createIfConverterPass());
}
-
- if (Subtarget.isThumb2()) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2ITBlockPass());
+
+ return true;
+}
+
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (Subtarget.isThumb2())
PM.add(createThumb2SizeReductionPass());
- }
PM.add(createARMConstantIslandPass());
return true;
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index bfa89c4..8415d1a 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr<ARMOperand> &Op) {
const AsmToken &NextTok = Parser.getTok();
if (NextTok.isNot(AsmToken::EndOfStatement)) {
if (NextTok.isNot(AsmToken::Comma))
- return Error(NextTok.getLoc(), "',' expected");
+ return Error(NextTok.getLoc(), "',' expected");
Parser.Lex(); // Eat comma token.
if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType,
ShiftAmount, Offset, OffsetIsReg, OffsetRegNum,
@@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative,
const AsmToken &Tok = Parser.getTok();
if (ParseShift(ShiftType, ShiftAmount, E))
- return Error(Tok.getLoc(), "shift expected");
+ return Error(Tok.getLoc(), "shift expected");
OffsetRegShifted = true;
}
}
@@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op.take());
- SMLoc Loc = Parser.getTok().getLoc();
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Read the first operand.
@@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
const StringRef &Mode = Tok.getString();
- bool unified_syntax;
- if (Mode == "unified" || Mode == "UNIFIED") {
+ if (Mode == "unified" || Mode == "UNIFIED")
Parser.Lex();
- unified_syntax = true;
- }
- else if (Mode == "divided" || Mode == "DIVIDED") {
+ else if (Mode == "divided" || Mode == "DIVIDED")
Parser.Lex();
- unified_syntax = false;
- }
else
return Error(L, "unrecognized syntax mode in .syntax directive");
@@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
if (Tok.isNot(AsmToken::Integer))
return Error(L, "unexpected token in .code directive");
int64_t Val = Parser.getTok().getIntVal();
- bool thumb_mode;
- if (Val == 16) {
+ if (Val == 16)
Parser.Lex();
- thumb_mode = true;
- }
- else if (Val == 32) {
+ else if (Val == 32)
Parser.Lex();
- thumb_mode = false;
- }
else
return Error(L, "invalid operand to .code directive");
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d95efdb..6a40cf3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -175,23 +175,8 @@ namespace {
raw_ostream &O);
void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
raw_ostream &O);
-
- void printHex8ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
- }
- void printHex16ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
- }
- void printHex32ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
- }
- void printHex64ImmOperand(const MachineInstr *MI, int OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
- }
+ void printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O);
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
@@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
O << '{'
- << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
+ << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi)
<< '}';
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
@@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
+ unsigned Align = MO2.getImm();
+ assert((Align == 8 || Align == 16 || Align == 32) &&
+ "unexpected NEON load/store alignment");
+ Align <<= 3;
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << MO2.getImm();
+ O << ", :" << Align;
}
O << "]";
}
@@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum,
}
}
+void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
+}
+
bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) {
@@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printOperand(MI, OpNum, O);
return false;
case 'Q':
- if (TM.getTargetData()->isLittleEndian())
- break;
- // Fallthrough
case 'R':
- if (TM.getTargetData()->isBigEndian())
- break;
- // Fallthrough
- case 'H': // Write second word of DI / DF reference.
- // Verify that this operand has two consecutive registers.
- if (!MI->getOperand(OpNum).isReg() ||
- OpNum+1 == MI->getNumOperands() ||
- !MI->getOperand(OpNum+1).isReg())
- return true;
- ++OpNum; // Return the high-part.
+ case 'H':
+ report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!");
+ return true;
}
}
@@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
} else if (MO.isGlobal()) {
MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
const MCSymbolRefExpr *SymRef1 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_LO16, OutContext);
const MCSymbolRefExpr *SymRef2 =
- MCSymbolRefExpr::Create(Symbol,
- MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_HI16, OutContext);
V1 = MCOperand::CreateExpr(SymRef1);
V2 = MCOperand::CreateExpr(SymRef2);
} else {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 2b94b76..170819a 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum,
O << '#' << MI->getOperand(OpNum).getImm();
}
-void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
-}
-
-void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
-}
-
-void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
-}
-
-void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum,
- raw_ostream &O) {
- O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum,
+ raw_ostream &O) {
+ unsigned EncodedImm = MI->getOperand(OpNum).getImm();
+ unsigned EltBits;
+ uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits);
+ O << "#0x" << utohexstr(Val);
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index be0b7c1..ddf5047 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -104,10 +104,7 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
- void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
+ void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O);
// FIXME: Implement.
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 29e66e1..0df3466 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen
NEONPreAllocPass.cpp
Thumb1InstrInfo.cpp
Thumb1RegisterInfo.cpp
+ Thumb2HazardRecognizer.cpp
Thumb2ITBlockPass.cpp
Thumb2InstrInfo.cpp
Thumb2RegisterInfo.cpp
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index adb7795..a07ff28 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -34,7 +34,7 @@
/// Uses and Defs by this instr. For the Uses part, the pred:$p operand is
/// defined with two components:
///
-/// def pred { // Operand PredicateOperand
+/// def pred { // Operand PredicateOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printPredicateOperand";
/// string AsmOperandLowerMethod = ?;
@@ -54,7 +54,7 @@
///
/// For the Defs part, in the simple case of only cc_out:$s, we have:
///
-/// def cc_out { // Operand OptionalDefOperand
+/// def cc_out { // Operand OptionalDefOperand
/// ValueType Type = OtherVT;
/// string PrintMethod = "printSBitModifierOperand";
/// string AsmOperandLowerMethod = ?;
@@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
|| Opcode == ARM::SMC || Opcode == ARM::SVC) &&
"Unexpected Opcode");
- assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected");
+ assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected");
int Imm32 = 0;
if (Opcode == ARM::SMC) {
@@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
(OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+2].RegClass == 0) &&
+ (OpInfo[OpIdx+2].RegClass < 0) &&
"Expect 3 reg operands");
// Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1.
@@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) &&
- (OpInfo[OpIdx+1].RegClass == 0) &&
+ (OpInfo[OpIdx+1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
@@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional imm5 LSL/ASR operand.
- if (ThreeReg && OpInfo[OpIdx].RegClass == 0
+ if (ThreeReg && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 5-bit immediate field Inst{11-7}.
unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F;
@@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// If there is still an operand info left which is an immediate operand, add
// an additional rotate immediate operand.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Extract the 2-bit rotate field Inst{11-10}.
unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3;
@@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
"Tied to operand expected");
MI.addOperand(MI.getOperand(0));
- assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() &&
+ assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef() && "Imm operand expected");
MI.addOperand(MCOperand::CreateImm(fbits));
@@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
// Extract/decode the f64/f32 immediate.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// The asm syntax specifies the before-expanded <imm>.
// Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0),
@@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) {
// imm3 = Inst{18-16}, imm4 = Inst{3-0}
// Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions.
static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) {
+ unsigned char op = (insn >> 5) & 1;
unsigned char cmode = (insn >> 8) & 0xF;
unsigned char Imm8 = ((insn >> 24) & 1) << 7 |
((insn >> 16) & 7) << 4 |
(insn & 0xF);
- uint64_t Imm64 = 0;
-
- switch (esize) {
- case ESize8:
- Imm64 = Imm8;
- break;
- case ESize16:
- Imm64 = Imm8 << 8*(cmode >> 1 & 1);
- break;
- case ESize32: {
- if (cmode == 12)
- Imm64 = (Imm8 << 8) | 0xFF;
- else if (cmode == 13)
- Imm64 = (Imm8 << 16) | 0xFFFF;
- else {
- // Imm8 to be shifted left by how many bytes...
- Imm64 = Imm8 << 8*(cmode >> 1 & 3);
- }
- break;
- }
- case ESize64: {
- for (unsigned i = 0; i < 8; ++i)
- if ((Imm8 >> i) & 1)
- Imm64 |= (uint64_t)0xFF << 8*i;
- break;
- }
- default:
- assert(0 && "Unreachable code!");
- return 0;
- }
-
- return Imm64;
+ return (op << 12) | (cmode << 8) | Imm8;
}
// A8.6.339 VMUL, VMULL (by scalar)
@@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
"Reg operand expected");
RegClass = OpInfo[OpIdx].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
// possible TIED_TO DPR/QPR's (ignored), then possible lane index.
RegClass = OpInfo[0].RegClass;
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
MI.addOperand(MCOperand::CreateReg(
getRegisterEnum(B, RegClass, Rd,
UseDRegPair(Opcode))));
@@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
- OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected");
+ OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected");
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
Rn)));
MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored?
@@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
++OpIdx;
}
- while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) {
+ while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
"Tied to operand expected");
MI.addOperand(MCOperand::CreateReg(0));
@@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
}
// Handle possible lane index.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn)));
++OpIdx;
@@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
(OpInfo[0].RegClass == ARM::DPRRegClassID ||
OpInfo[0].RegClass == ARM::QPRRegClassID) &&
- (OpInfo[1].RegClass == 0) &&
+ (OpInfo[1].RegClass < 0) &&
"Expect 1 reg operand followed by 1 imm operand");
// Qd/Dd = Inst{22:15-12} => NEON Rd
@@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
}
// Add the imm operand, if required.
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
unsigned imm = 0xFFFFFFFF;
@@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeNEONRm(insn))));
++OpIdx;
- assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected");
+ assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected");
// Add the imm operand.
@@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
getRegisterEnum(B, OpInfo[OpIdx].RegClass, m)));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the imm operand.
unsigned Imm = 0;
@@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return true;
}
-static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
- assert(0 && "Unreachable code!");
- return false;
-}
-
// Vector Get Lane (move scalar to ARM core register) Instructions.
// VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index
-static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
OpInfo[1].RegClass == ARM::DPRRegClassID &&
- OpInfo[2].RegClass == 0 &&
+ OpInfo[2].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Set Lane (move ARM core register to scalar) Instructions.
// VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index
-static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetInstrDesc &TID = ARMInsts[Opcode];
@@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
OpInfo[1].RegClass == ARM::DPRRegClassID &&
TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
OpInfo[2].RegClass == ARM::GPRRegClassID &&
- OpInfo[3].RegClass == 0 &&
+ OpInfo[3].RegClass < 0 &&
"Expect >= 3 operands with one dst operand");
ElemSize esize =
@@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Vector Duplicate Instructions (from ARM core register to all elements).
// VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt
-static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
+static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
@@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
return false;
}
-static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
- unsigned short NumOps, unsigned &NumOpsAdded, BO) {
-
- assert(0 && "Unexpected thumb misc. instruction!");
- return false;
-}
-
/// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP.
/// We divide the disassembly task into different categories, with each one
/// corresponding to a specific instruction encoding format. There could be
@@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = {
&DisassembleVFPLdStMulFrm,
&DisassembleVFPMiscFrm,
&DisassembleThumbFrm,
- &DisassembleNEONFrm,
- &DisassembleNEONGetLnFrm,
- &DisassembleNEONSetLnFrm,
- &DisassembleNEONDupFrm,
&DisassembleMiscFrm,
- &DisassembleThumbMiscFrm,
+ &DisassembleNGetLnFrm,
+ &DisassembleNSetLnFrm,
+ &DisassembleNDupFrm,
// VLD and VST (including one lane) Instructions.
&DisassembleNLdSt,
@@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
@@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
// a pair of TargetOperandInfos with isPredicate() property.
if (NumOpsRemaining >= 2 &&
OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
- OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
+ OpInfo[Idx].RegClass < 0 &&
+ OpInfo[Idx+1].RegClass == ARM::CCRRegClassID)
{
// If we are inside an IT block, get the IT condition bits maintained via
// ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond().
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index b1d90df..7d21256 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To,
/// Various utilities for checking the target specific flags.
/// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(unsigned TSFlags) {
+static inline bool isUnaryDP(uint64_t TSFlags) {
return (TSFlags & ARMII::UnaryDP);
}
/// This four-bit field describes the addressing mode used.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(unsigned TSFlags) {
+static inline unsigned getAddrMode(uint64_t TSFlags) {
return (TSFlags & ARMII::AddrModeMask);
}
/// {IndexModePre, IndexModePost}
/// Only valid for load and store ops.
/// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(unsigned TSFlags) {
+static inline unsigned getIndexMode(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
}
/// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(unsigned TSFlags) {
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
return (TSFlags & ARMII::IndexModeMask) != 0;
}
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b2e308..4b7a0bf 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID,
getT1tRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn)
@@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
assert(OpIdx < NumOps && "More operands expected");
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() &&
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() &&
!OpInfo[OpIdx].isOptionalDef()) {
MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0));
@@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
if (!OpInfo) return false;
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass == 0 &&
+ (OpInfo[1].RegClass < 0 &&
!OpInfo[1].isPredicate() &&
!OpInfo[1].isOptionalDef())
&& "Invalid arguments");
@@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps >= 3 &&
OpInfo[0].RegClass == ARM::tGPRRegClassID &&
OpInfo[1].RegClass == ARM::GPRRegClassID &&
- (OpInfo[2].RegClass == 0 &&
+ (OpInfo[2].RegClass < 0 &&
!OpInfo[2].isPredicate() &&
!OpInfo[2].isOptionalDef())
&& "Invalid arguments");
@@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
// Predicate operands are handled elsewhere.
if (NumOps == 2 &&
OpInfo[0].isPredicate() && OpInfo[1].isPredicate() &&
- OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
+ OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) {
return true;
}
@@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
}
assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
- (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
+ (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID)
&& "Expect >=2 operands");
// Add the destination operand.
@@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 3 && OpInfo[0].RegClass == 0 &&
+ assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID
&& "Exactly 3 operands expected");
@@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
if (!OpInfo) return false;
- assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected");
+ assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
unsigned Imm11 = getT1Imm11(insn);
@@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
&& OpInfo[2].RegClass == ARM::GPRRegClassID
- && OpInfo[3].RegClass == 0
+ && OpInfo[3].RegClass < 0
&& "Expect >= 4 operands and first 3 as reg operands");
// Add the <Rt> <Rt2> operands.
@@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
assert(NumOps == 4
&& OpInfo[0].RegClass == ARM::GPRRegClassID
&& OpInfo[1].RegClass == ARM::GPRRegClassID
- && OpInfo[2].RegClass == 0
- && OpInfo[3].RegClass == 0
+ && OpInfo[2].RegClass < 0
+ && OpInfo[3].RegClass < 0
&& "Exactlt 4 operands expect and first two as reg operands");
// Only need to populate the src reg operand.
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
if (NumOps == OpIdx)
return true;
- if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()) {
if (Thumb2ShiftOpcode(Opcode))
@@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
}
// The modified immediate operand should come next.
- assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
} else {
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
int Offset = 0;
@@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
}
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 &&
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 &&
!OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs.
MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4)));
@@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
assert(NumOps >= 2 &&
OpInfo[0].RegClass == ARM::GPRRegClassID &&
- OpInfo[1].RegClass == 0 &&
+ OpInfo[1].RegClass < 0 &&
"Expect >= 2 operands, first as reg, and second as imm operand");
// Build the register operand, followed by the (+/-)imm12 immediate.
@@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
++OpIdx;
}
- assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate()
+ assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
decodeRm(insn))));
++OpIdx;
- if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0
+ if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0
&& !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) {
// Add the rotation amount immediate.
MI.addOperand(MCOperand::CreateImm(decodeRotate(insn)));
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 0a4400c..bbdd3c7 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
unsigned MOReg = MO.getReg();
Defs[MOReg] = MI;
- // Catch subregs as well.
- for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R)
+ // Catch aliases as well.
+ for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R)
Defs[*R] = MI;
}
}
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index a725898..f67717c 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
"expected a virtual register");
// Extracting from a Q or QQ register.
MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
- if (!DefMI || !DefMI->isExtractSubreg())
+ if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg())
return false;
VirtReg = DefMI->getOperand(1).getReg();
if (LastSrcReg && LastSrcReg != VirtReg)
@@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
RC != ARM::QQPRRegisterClass &&
RC != ARM::QQQQPRRegisterClass)
return false;
- unsigned SubIdx = DefMI->getOperand(2).getImm();
+ unsigned SubIdx = DefMI->getOperand(1).getSubReg();
if (LastSubIdx) {
if (LastSubIdx != SubIdx-Stride)
return false;
@@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
// currently required for correctness. e.g.
- // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1041<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
- // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+ // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5
// respectively.
// We need to change how we model uses of REG_SEQUENCE.
for (unsigned R = 0; R < NumRegs; ++R) {
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
unsigned OldReg = MO.getReg();
MachineInstr *DefMI = MRI->getVRegDef(OldReg);
- assert(DefMI->isExtractSubreg());
+ assert(DefMI->isCopy());
MO.setReg(LastSrcReg);
MO.setSubReg(SubIds[R]);
- if (R != 0)
- MO.setIsKill(false);
+ MO.setIsKill(false);
// Delete the EXTRACT_SUBREG if its result is now dead.
if (MRI->use_empty(OldReg))
DefMI->eraseFromParent();
@@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
unsigned FirstOpnd, NumRegs, Offset, Stride;
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- if (llvm::ModelWithRegSequence() &&
- FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
+ if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
-
- MachineBasicBlock::iterator NextI = llvm::next(MBBI);
- for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
- assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "expected a virtual register");
-
- // For now, just assign a fixed set of adjacent registers.
- // This leaves plenty of room for future improvements.
- static const unsigned NEONDRegs[] = {
- ARM::D0, ARM::D1, ARM::D2, ARM::D3,
- ARM::D4, ARM::D5, ARM::D6, ARM::D7
- };
- MO.setReg(NEONDRegs[Offset + R * Stride]);
-
- if (MO.isUse()) {
- // Insert a copy from VirtReg.
- TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- if (MO.isKill()) {
- MachineInstr *CopyMI = prior(MBBI);
- CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
- }
- MO.setIsKill();
- } else if (MO.isDef() && !MO.isDead()) {
- // Add a copy to VirtReg.
- TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
- ARM::DPRRegisterClass, ARM::DPRRegisterClass,
- DebugLoc());
- }
- }
+ llvm_unreachable("expected a REG_SEQUENCE");
}
return Modified;
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index fae84d4..af630ac 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
- }
- }
-
- return false;
-}
-
-bool Thumb1InstrInfo::
-canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- return false;
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- return false;
- }
- return true;
- }
- }
-
- return false;
+void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
+ "Thumb1 can only copy GPR registers");
}
void Thumb1InstrInfo::
@@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
isKill = false;
}
- if (isKill) {
+ if (isKill)
MBB.addLiveIn(Reg);
- MIB.addReg(Reg, RegState::Kill);
- }
+
+ MIB.addReg(Reg, getKillRegState(isKill));
}
return true;
}
@@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-
-MachineInstr *Thumb1InstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = NULL;
- switch (Opc) {
- default: break;
- case ARM::tMOVr:
- case ARM::tMOVtgpr2gpr:
- case ARM::tMOVgpr2tgpr:
- case ARM::tMOVgpr2gpr: {
- if (OpNum == 0) { // move -> store
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
- !isARMLowRegister(SrcReg))
- // tSpill cannot take a high register operand.
- break;
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0));
- } else { // move -> load
- unsigned DstReg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) &&
- !isARMLowRegister(DstReg))
- // tRestore cannot target a high register operand.
- break;
- bool isDead = MI->getOperand(0).isDead();
- NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
- .addReg(DstReg,
- RegState::Define | getDeadRegState(isDead))
- .addFrameIndex(FI).addImm(0));
- }
- break;
- }
- }
-
- return NewMI;
-}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index c937296..555135a 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -46,12 +46,10 @@ public:
const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -64,20 +62,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
};
}
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 2f635fe..39b70b4 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg);
}
-const TargetRegisterClass*
-Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
- if (isARMLowRegister(Reg))
- return ARM::tGPRRegisterClass;
- switch (Reg) {
- default:
- break;
- case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11:
- case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC:
- return ARM::GPRRegisterClass;
- }
-
- return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT);
-}
-
bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
const MachineFrameInfo *FFI = MF.getFrameInfo();
unsigned CFSize = FFI->getMaxCallFrameSize();
@@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
// before that instead and adjust the UseMI.
bool done = false;
for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) {
+ if (II->isDebugValue())
+ continue;
// If this instruction affects R12, adjust our restore point.
for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = II->getOperand(i);
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 4eca367..9a0308af 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -38,9 +38,6 @@ public:
unsigned PredReg = 0) const;
/// Code Generation virtual methods...
- const TargetRegisterClass *
- getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const;
-
bool hasReservedCallFrame(MachineFunction &MF) const;
void eliminateCallFramePseudoInstr(MachineFunction &MF,
@@ -51,7 +48,8 @@ public:
// could not be handled directly in MI.
int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
unsigned FrameReg, int Offset,
- unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const;
+ unsigned MOVOpc, unsigned ADDriOpc,
+ unsigned SUBriOpc) const;
bool saveScavengerRegister(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
new file mode 100644
index 0000000..172908d
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp
@@ -0,0 +1,53 @@
+//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "Thumb2HazardRecognizer.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+using namespace llvm;
+
+ScheduleHazardRecognizer::HazardType
+Thumb2HazardRecognizer::getHazardType(SUnit *SU) {
+ if (ITBlockSize) {
+ MachineInstr *MI = SU->getInstr();
+ if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1])
+ return Hazard;
+ }
+
+ return PostRAHazardRecognizer::getHazardType(SU);
+}
+
+void Thumb2HazardRecognizer::Reset() {
+ ITBlockSize = 0;
+ PostRAHazardRecognizer::Reset();
+}
+
+void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) {
+ MachineInstr *MI = SU->getInstr();
+ unsigned Opcode = MI->getOpcode();
+ if (ITBlockSize) {
+ --ITBlockSize;
+ } else if (Opcode == ARM::t2IT) {
+ unsigned Mask = MI->getOperand(1).getImm();
+ unsigned NumTZ = CountTrailingZeros_32(Mask);
+ assert(NumTZ <= 3 && "Invalid IT mask!");
+ ITBlockSize = 4 - NumTZ;
+ MachineBasicBlock::iterator I = MI;
+ for (unsigned i = 0; i < ITBlockSize; ++i) {
+ // Advance to the next instruction, skipping any dbg_value instructions.
+ do {
+ ++I;
+ } while (I->isDebugValue());
+ ITBlockMIs[ITBlockSize-1-i] = &*I;
+ }
+ }
+
+ PostRAHazardRecognizer::EmitInstruction(SU);
+}
diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h
new file mode 100644
index 0000000..4726658
--- /dev/null
+++ b/lib/Target/ARM/Thumb2HazardRecognizer.h
@@ -0,0 +1,40 @@
+//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines hazard recognizers for scheduling Thumb2 functions on
+// ARM processors.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMB2HAZARDRECOGNIZER_H
+#define THUMB2HAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+
+namespace llvm {
+
+class MachineInstr;
+
+class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
+ unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled.
+ MachineInstr *ITBlockMIs[4];
+
+public:
+ Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
+ PostRAHazardRecognizer(ItinData) {}
+
+ virtual HazardType getHazardType(SUnit *SU);
+ virtual void Reset();
+ virtual void EmitInstruction(SUnit *SU);
+};
+
+
+} // end namespace llvm
+
+#endif // THUMB2HAZARDRECOGNIZER_H
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index f36d4ef..cd15bbe 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -14,17 +14,23 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
-STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumITs, "Number of IT blocks inserted");
+STATISTIC(NumMovedInsts, "Number of predicated instructions moved");
namespace {
- struct Thumb2ITBlockPass : public MachineFunctionPass {
+ class Thumb2ITBlockPass : public MachineFunctionPass {
+ bool PreRegAlloc;
+
+ public:
static char ID;
Thumb2ITBlockPass() : MachineFunctionPass(&ID) {}
const Thumb2InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
ARMFunctionInfo *AFI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -34,61 +40,167 @@ namespace {
}
private:
- bool InsertITBlocks(MachineBasicBlock &MBB);
+ bool MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses);
+ bool InsertITInstructions(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
}
-static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
- unsigned Opc = MI->getOpcode();
- if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
- return ARMCC::AL;
- return llvm::getInstrPredicate(MI, PredReg);
+/// TrackDefUses - Tracking what registers are being defined and used by
+/// instructions in the IT block. This also tracks "dependencies", i.e. uses
+/// in the IT block that are defined before the IT instruction.
+static void TrackDefUses(MachineInstr *MI,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses,
+ const TargetRegisterInfo *TRI) {
+ SmallVector<unsigned, 4> LocalDefs;
+ SmallVector<unsigned, 4> LocalUses;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP)
+ continue;
+ if (MO.isUse())
+ LocalUses.push_back(Reg);
+ else
+ LocalDefs.push_back(Reg);
+ }
+
+ for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) {
+ unsigned Reg = LocalUses[i];
+ Uses.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Uses.insert(*Subreg);
+ }
+
+ for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
+ unsigned Reg = LocalDefs[i];
+ Defs.insert(Reg);
+ for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
+ *Subreg; ++Subreg)
+ Defs.insert(*Subreg);
+ if (Reg == ARM::CPSR)
+ continue;
+ }
+}
+
+bool
+Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI,
+ ARMCC::CondCodes CC, ARMCC::CondCodes OCC,
+ SmallSet<unsigned, 4> &Defs,
+ SmallSet<unsigned, 4> &Uses) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ assert(SrcSubIdx == 0 && DstSubIdx == 0 &&
+ "Sub-register indices still around?");
+ // llvm models select's as two-address instructions. That means a copy
+ // is inserted before a t2MOVccr, etc. If the copy is scheduled in
+ // between selects we would end up creating multiple IT blocks.
+
+ // First check if it's safe to move it.
+ if (Uses.count(DstReg) || Defs.count(SrcReg))
+ return false;
+
+ // Then peek at the next instruction to see if it's predicated on CC or OCC.
+ // If not, then there is nothing to be gained by moving the copy.
+ MachineBasicBlock::iterator I = MI; ++I;
+ MachineBasicBlock::iterator E = MI->getParent()->end();
+ while (I != E && I->isDebugValue())
+ ++I;
+ if (I != E) {
+ unsigned NPredReg = 0;
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg);
+ if (NCC == CC || NCC == OCC)
+ return true;
+ }
+ }
+ return false;
}
-bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
+bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) {
bool Modified = false;
+ SmallSet<unsigned, 4> Defs;
+ SmallSet<unsigned, 4> Uses;
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr *MI = &*MBBI;
DebugLoc dl = MI->getDebugLoc();
unsigned PredReg = 0;
- ARMCC::CondCodes CC = getPredicate(MI, PredReg);
-
+ ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg);
if (CC == ARMCC::AL) {
++MBBI;
continue;
}
+ Defs.clear();
+ Uses.clear();
+ TrackDefUses(MI, Defs, Uses, TRI);
+
// Insert an IT instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT))
.addImm(CC);
+
+ // Add implicit use of ITSTATE to IT block instructions.
+ MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+
+ MachineInstr *LastITMI = MI;
+ MachineBasicBlock::iterator InsertPos = MIB;
++MBBI;
- // Finalize IT mask.
+ // Form IT block.
ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
unsigned Mask = 0, Pos = 3;
// Branches, including tricky ones like LDM_RET, need to end an IT
// block so check the instruction we just put in the block.
- while (MBBI != E && Pos &&
- (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) {
+ for (; MBBI != E && Pos &&
+ (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) {
+ if (MBBI->isDebugValue())
+ continue;
+
MachineInstr *NMI = &*MBBI;
MI = NMI;
- DebugLoc ndl = NMI->getDebugLoc();
+
unsigned NPredReg = 0;
- ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NCC == CC || NCC == OCC)
+ ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg);
+ if (NCC == CC || NCC == OCC) {
Mask |= (NCC & 1) << Pos;
- else
+ // Add implicit use of ITSTATE.
+ NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/,
+ true/*isImp*/, false/*isKill*/));
+ LastITMI = NMI;
+ } else {
+ if (NCC == ARMCC::AL &&
+ MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) {
+ --MBBI;
+ MBB.remove(NMI);
+ MBB.insert(InsertPos, NMI);
+ ++NumMovedInsts;
+ continue;
+ }
break;
+ }
+ TrackDefUses(NMI, Defs, Uses, TRI);
--Pos;
- ++MBBI;
}
+
+ // Finalize IT mask.
Mask |= (1 << Pos);
// Tag along (firstcond[0] << 4) with the mask.
Mask |= (CC & 1) << 4;
MIB.addImm(Mask);
+
+ // Last instruction in IT block kills ITSTATE.
+ LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill();
+
Modified = true;
++NumITs;
}
@@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
AFI = Fn.getInfo<ARMFunctionInfo>();
TII = static_cast<const Thumb2InstrInfo*>(TM.getInstrInfo());
+ TRI = TM.getRegisterInfo();
if (!AFI->isThumbFunction())
return false;
bool Modified = false;
- for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
- ++MFI) {
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) {
MachineBasicBlock &MBB = *MFI;
- Modified |= InsertITBlocks(MBB);
+ ++MFI;
+ Modified |= InsertITInstructions(MBB);
}
+ if (Modified)
+ AFI->setHasITBlocks(true);
+
return Modified;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 531d5e9..ee51727 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -17,15 +17,27 @@
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
+#include "Thumb2HazardRecognizer.h"
+#include "Thumb2InstrInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
-#include "Thumb2InstrInfo.h"
+#include "llvm/Support/CommandLine.h"
using namespace llvm;
+static cl::opt<unsigned>
+IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden,
+ cl::desc("Thumb2 if-conversion limit (default 3)"),
+ cl::init(3));
+
+static cl::opt<unsigned>
+IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden,
+ cl::desc("Thumb2 diamond if-conversion limit (default 3)"),
+ cl::init(3));
+
Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
: ARMBaseInstrInfo(STI), RI(*this, STI) {
}
@@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool
-Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == ARM::GPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg);
- return true;
- }
- } else if (DestRC == ARM::tGPRRegisterClass) {
- if (SrcRC == ARM::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg);
- return true;
- } else if (SrcRC == ARM::tGPRRegisterClass) {
- BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
- return true;
+void
+Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const {
+ MachineBasicBlock *MBB = Tail->getParent();
+ ARMFunctionInfo *AFI = MBB->getParent()->getInfo<ARMFunctionInfo>();
+ if (!AFI->hasITBlocks()) {
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+ return;
+ }
+
+ // If the first instruction of Tail is predicated, we may have to update
+ // the IT instruction.
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg);
+ MachineBasicBlock::iterator MBBI = Tail;
+ if (CC != ARMCC::AL)
+ // Expecting at least the t2IT instruction before it.
+ --MBBI;
+
+ // Actually replace the tail.
+ TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest);
+
+ // Fix up IT.
+ if (CC != ARMCC::AL) {
+ MachineBasicBlock::iterator E = MBB->begin();
+ unsigned Count = 4; // At most 4 instructions in an IT block.
+ while (Count && MBBI != E) {
+ if (MBBI->isDebugValue()) {
+ --MBBI;
+ continue;
+ }
+ if (MBBI->getOpcode() == ARM::t2IT) {
+ unsigned Mask = MBBI->getOperand(1).getImm();
+ if (Count == 4)
+ MBBI->eraseFromParent();
+ else {
+ unsigned MaskOn = 1 << Count;
+ unsigned MaskOff = ~(MaskOn - 1);
+ MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn);
+ }
+ return;
+ }
+ --MBBI;
+ --Count;
}
+
+ // Ctrl flow can reach here if branch folding is run before IT block
+ // formation pass.
}
+}
+
+bool
+Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const {
+ unsigned PredReg = 0;
+ return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL;
+}
+bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
+ unsigned NumInstrs) const {
+ return NumInstrs && NumInstrs <= IfCvtLimit;
+}
+
+bool Thumb2InstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
+ MachineBasicBlock &FMBB, unsigned NumF) const {
+ // FIXME: Catch optimization such as:
+ // r0 = movne
+ // r0 = moveq
+ return NumT && NumF &&
+ NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit);
+}
+
+void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
// Handle SPR, DPR, and QPR copies.
- return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
+ if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
+ return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
+
+ bool tDest = ARM::tGPRRegClass.contains(DestReg);
+ bool tSrc = ARM::tGPRRegClass.contains(SrcReg);
+ unsigned Opc = ARM::tMOVgpr2gpr;
+ if (tDest && tSrc)
+ Opc = ARM::tMOVr;
+ else if (tSrc)
+ Opc = ARM::tMOVtgpr2gpr;
+ else if (tDest)
+ Opc = ARM::tMOVgpr2tgpr;
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void Thumb2InstrInfo::
@@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
- if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass ||
+ RC == ARM::tcGPRRegisterClass) {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
+ScheduleHazardRecognizer *Thumb2InstrInfo::
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
+ return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
+}
+
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
@@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
// Use a movw to materialize the 16-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg)
.addImm(NumBytes)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
} else if ((NumBytes & 0xffff) == 0) {
// Use a movt to materialize the 32-bit constant.
BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg)
.addReg(DestReg)
.addImm(NumBytes >> 16)
- .addImm((unsigned)Pred).addReg(PredReg).addReg(0);
+ .addImm((unsigned)Pred).addReg(PredReg);
Fits = true;
}
@@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
Offset = (isSub) ? -Offset : Offset;
return Offset == 0;
}
+
+/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+/// two-addrss instruction inserted by two-address pass.
+void
+Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
+ MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const {
+ if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
+ SrcMI->getOperand(1).isKill())
+ return;
+
+ unsigned PredReg = 0;
+ ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg);
+ if (CC == ARMCC::AL || PredReg != ARM::CPSR)
+ return;
+
+ // Schedule the copy so it doesn't come between previous instructions
+ // and UseMI which can form an IT block.
+ unsigned SrcReg = SrcMI->getOperand(1).getReg();
+ ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC);
+ MachineBasicBlock *MBB = UseMI->getParent();
+ MachineBasicBlock::iterator MBBI = SrcMI;
+ unsigned NumInsts = 0;
+ while (--MBBI != MBB->begin()) {
+ if (MBBI->isDebugValue())
+ continue;
+
+ MachineInstr *NMI = &*MBBI;
+ ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg);
+ if (!(NCC == CC || NCC == OCC) ||
+ NMI->modifiesRegister(SrcReg, &TRI) ||
+ NMI->definesRegister(ARM::CPSR))
+ break;
+ if (++NumInsts == 4)
+ // Too many in a row!
+ return;
+ }
+
+ if (NumInsts) {
+ MBB->remove(SrcMI);
+ MBB->insert(++MBBI, SrcMI);
+ }
+}
+
+ARMCC::CondCodes
+llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
+ unsigned Opc = MI->getOpcode();
+ if (Opc == ARM::tBcc || Opc == ARM::t2Bcc)
+ return ARMCC::AL;
+ return llvm::getInstrPredicate(MI, PredReg);
+}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 2948770..3a9f8b1 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -20,7 +20,8 @@
#include "Thumb2RegisterInfo.h"
namespace llvm {
- class ARMSubtarget;
+class ARMSubtarget;
+class ScheduleHazardRecognizer;
class Thumb2InstrInfo : public ARMBaseInstrInfo {
Thumb2RegisterInfo RI;
@@ -31,12 +32,21 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
+ MachineBasicBlock *NewDest) const;
+
+ bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const;
+
+ bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
+ MachineBasicBlock &FMBB, unsigned NumFInstrs) const;
+
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -50,12 +60,27 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
+ /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the
+ /// two-addrss instruction inserted by two-address pass.
+ void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI,
+ const TargetRegisterInfo &TRI) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
+
+ ScheduleHazardRecognizer *
+ CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
};
+
+/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
+/// to llvm::getInstrPredicate except it returns AL for conditional branch
+/// instructions which are "predicated", but are not in IT blocks.
+ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg);
+
+
}
#endif // THUMB2INSTRUCTIONINFO_H
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index 8fe2e42..ba392f3 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
return false;
- const TargetInstrDesc &TID = MI->getDesc();
unsigned Reg0 = MI->getOperand(0).getReg();
unsigned Reg1 = MI->getOperand(1).getReg();
- if (Reg0 != Reg1)
- return false;
+ if (Reg0 != Reg1) {
+ // Try to commute the operands to make it a 2-address instruction.
+ unsigned CommOpIdx1, CommOpIdx2;
+ if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) ||
+ CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0)
+ return false;
+ MachineInstr *CommutedMI = TII->commuteInstruction(MI);
+ if (!CommutedMI)
+ return false;
+ }
if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
return false;
if (Entry.Imm2Limit) {
@@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
bool HasCC = false;
bool CCDead = false;
+ const TargetInstrDesc &TID = MI->getDesc();
if (TID.hasOptionalDef()) {
unsigned NumOps = TID.getNumOperands();
HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
@@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
goto ProcessNext;
}
- // Try to transform ro a 16-bit non-two-address instruction.
+ // Try to transform to a 16-bit non-two-address instruction.
if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) {
Modified = true;
MachineBasicBlock::iterator I = prior(NextMII);
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 1d85f12..ea78bf3 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -224,6 +224,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -251,7 +252,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -425,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
}
} else { //more args
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false);
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -444,7 +445,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
- int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true);
if (i == 0) FuncInfo->setVarArgsBase(FI);
SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
@@ -453,7 +454,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
- FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true);
SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0,
false, false, 0));
@@ -470,6 +471,7 @@ SDValue
AlphaTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26,
@@ -483,7 +485,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
break;
//return SDValue(); // ret void is legal
case 1: {
- EVT ArgVT = Outs[0].Val.getValueType();
+ EVT ArgVT = Outs[0].VT;
unsigned ArgReg;
if (ArgVT.isInteger())
ArgReg = Alpha::R0;
@@ -492,13 +494,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
ArgReg = Alpha::F0;
}
Copy = DAG.getCopyToReg(Copy, dl, ArgReg,
- Outs[0].Val, Copy.getValue(1));
+ OutVals[0], Copy.getValue(1));
if (DAG.getMachineFunction().getRegInfo().liveout_empty())
DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg);
break;
}
case 2: {
- EVT ArgVT = Outs[0].Val.getValueType();
+ EVT ArgVT = Outs[0].VT;
unsigned ArgReg1, ArgReg2;
if (ArgVT.isInteger()) {
ArgReg1 = Alpha::R0;
@@ -509,13 +511,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain,
ArgReg2 = Alpha::F1;
}
Copy = DAG.getCopyToReg(Copy, dl, ArgReg1,
- Outs[0].Val, Copy.getValue(1));
+ OutVals[0], Copy.getValue(1));
if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1)
== DAG.getMachineFunction().getRegInfo().liveout_end())
DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1);
Copy = DAG.getCopyToReg(Copy, dl, ArgReg2,
- Outs[1].Val, Copy.getValue(1));
+ OutVals[1], Copy.getValue(1));
if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(),
DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2)
== DAG.getMachineFunction().getRegInfo().liveout_end())
@@ -539,7 +541,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain,
false, false, 0);
SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP,
DAG.getConstant(8, MVT::i64));
- SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1),
+ SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1),
Tmp, NULL, 0, MVT::i32, false, false, 0);
DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset);
if (N->getValueType(0).isFloatingPoint())
@@ -643,10 +645,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
case ISD::GlobalAddress: {
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset());
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64,
+ GSDN->getOffset());
// FIXME there isn't really any debug info here
- // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) {
+ // if (!GV->hasWeakLinkage() && !GV->isDeclaration()
+ // && !GV->hasLinkOnceLinkage()) {
if (GV->hasLocalLinkage()) {
SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA,
DAG.getGLOBAL_OFFSET_TABLE(MVT::i64));
@@ -702,7 +706,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
SDValue Result;
if (Op.getValueType() == MVT::i32)
- Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr,
+ Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr,
NULL, 0, MVT::i32, false, false, 0);
else
Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0,
@@ -722,7 +726,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op,
false, false, 0);
SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP,
DAG.getConstant(8, MVT::i64));
- Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result,
+ Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result,
NP, NULL,0, MVT::i32, false, false, 0);
SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP,
DAG.getConstant(8, MVT::i64));
@@ -863,7 +867,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- sinkMBB->transferSuccessors(thisMBB);
+ sinkMBB->splice(sinkMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ thisMBB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
F->insert(It, llscMBB);
F->insert(It, sinkMBB);
@@ -912,7 +919,7 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
thisMBB->addSuccessor(llscMBB);
llscMBB->addSuccessor(llscMBB);
llscMBB->addSuccessor(sinkMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index 7ee823a..46e0c7d 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -121,6 +121,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -129,6 +130,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td
index d984556..6f4ebf2 100644
--- a/lib/Target/Alpha/AlphaInstrFormats.td
+++ b/lib/Target/Alpha/AlphaInstrFormats.td
@@ -182,7 +182,7 @@ class OForm4<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Inst
bits<5> Rb;
bits<7> Function = fun;
-// let isTwoAddress = 1;
+// let Constraints = "$RFALSE = $RDEST";
let Inst{25-21} = Ra;
let Inst{20-16} = Rb;
let Inst{15-13} = 0;
@@ -223,7 +223,7 @@ class OForm4L<bits<6> opcode, bits<7> fun, string asmstr, list<dag> pattern, Ins
bits<8> LIT;
bits<7> Function = fun;
-// let isTwoAddress = 1;
+// let Constraints = "$RFALSE = $RDEST";
let Inst{25-21} = Ra;
let Inst{20-13} = LIT;
let Inst{12} = 1;
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 3aba363..ad625a2 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -110,9 +110,8 @@ static bool isAlphaIntCondCode(unsigned Opcode) {
unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
"Alpha branch conditions have two components!");
@@ -120,58 +119,47 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB,
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
- BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB);
else // Conditional branch
if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
if (isAlphaIntCondCode(Cond[0].getImm()))
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
else
- BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F))
+ BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB);
return 2;
}
-bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == Alpha::GPRCRegisterClass) {
+void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
- } else if (DestRC == Alpha::F4RCRegisterClass) {
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
- } else if (DestRC == Alpha::F8RCRegisterClass) {
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) {
BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg)
.addReg(SrcReg)
- .addReg(SrcReg);
+ .addReg(SrcReg, getKillRegState(KillSrc));
} else {
- // Attempt to copy register that is not GPR or FPR
- return false;
+ llvm_unreachable("Attempt to copy register that is not GPR or FPR");
}
-
- return true;
}
void
@@ -227,51 +215,6 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Unhandled register class");
}
-MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
- if (Ops.size() != 1) return NULL;
-
- // Make sure this is a reg-reg copy.
- unsigned Opc = MI->getOpcode();
-
- MachineInstr *NewMI = NULL;
- switch(Opc) {
- default:
- break;
- case Alpha::BISr:
- case Alpha::CPYSS:
- case Alpha::CPYST:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) {
- if (Ops[0] == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- Opc = (Opc == Alpha::BISr) ? Alpha::STQ :
- ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT);
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addFrameIndex(FrameIndex)
- .addReg(Alpha::F31);
- } else { // load -> move
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- Opc = (Opc == Alpha::BISr) ? Alpha::LDQ :
- ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT);
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addFrameIndex(FrameIndex)
- .addReg(Alpha::F31);
- }
- }
- break;
- }
- return NewMI;
-}
-
static unsigned AlphaRevCondCode(unsigned Opcode) {
switch (Opcode) {
case Alpha::BEQ: return Alpha::BNE;
@@ -428,11 +371,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Alpha::R29);
RegInfo.addLiveIn(Alpha::R29);
AlphaFI->setGlobalBaseReg(GlobalBaseReg);
@@ -456,11 +396,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global return address register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalRetAddr).addReg(Alpha::R26);
RegInfo.addLiveIn(Alpha::R26);
AlphaFI->setGlobalRetAddr(GlobalRetAddr);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index 7d7365b..e20e832 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -42,14 +42,13 @@ public:
int &FrameIndex) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -62,18 +61,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index a47a29b..92de78a 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -680,18 +680,32 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC",
}
//conditional moves, floats
-let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
- isTwoAddress = 1 in {
-def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero
-def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero
-def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero
-def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero
-def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero
-def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero
+let OutOperandList = (outs F4RC:$RDEST),
+ InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
+def FCMOVEQS : FPForm<0x17, 0x02A,
+ "fcmoveq $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if = zero
+def FCMOVGES : FPForm<0x17, 0x02D,
+ "fcmovge $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if >= zero
+def FCMOVGTS : FPForm<0x17, 0x02F,
+ "fcmovgt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if > zero
+def FCMOVLES : FPForm<0x17, 0x02E,
+ "fcmovle $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if <= zero
+def FCMOVLTS : FPForm<0x17, 0x02C,
+ "fcmovlt $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; // FCMOVE if < zero
+def FCMOVNES : FPForm<0x17, 0x02B,
+ "fcmovne $RCOND,$RTRUE,$RDEST",
+ [], s_fcmov>; //FCMOVE if != zero
}
//conditional moves, doubles
-let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
- isTwoAddress = 1 in {
+let OutOperandList = (outs F8RC:$RDEST),
+ InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND),
+ Constraints = "$RTRUE = $RDEST" in {
def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>;
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index c083d8c..dc9d935 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -74,20 +74,6 @@ const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass,
- &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(Alpha::R15);
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 720367a..f9fd87a 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -30,9 +30,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index b4da96c..80ee107 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -132,8 +132,8 @@ static void UpdateNodeOperand(SelectionDAG &DAG,
SDValue Val) {
SmallVector<SDValue, 8> ops(N->op_begin(), N->op_end());
ops[Num] = Val;
- SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size());
- DAG.ReplaceAllUsesWith(N, New.getNode());
+ SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size());
+ DAG.ReplaceAllUsesWith(N, New);
}
// After instruction selection, insert COPY_TO_REGCLASS nodes to help in
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index adf2118..6e828e1 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -143,7 +143,7 @@ SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op,
DebugLoc DL = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- Op = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op);
}
@@ -205,8 +205,7 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
} else {
assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
unsigned ObjSize = VA.getLocVT().getStoreSize();
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
- true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
false, false, 0));
@@ -220,6 +219,7 @@ SDValue
BlackfinTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to locations.
@@ -245,7 +245,7 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue Opi = Outs[i].Val;
+ SDValue Opi = OutVals[i];
// Expand to i32 if necessary
switch (VA.getLocInfo()) {
@@ -278,6 +278,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -301,7 +302,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -357,7 +358,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index a784248..6bebcc3 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -63,6 +63,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -71,6 +72,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
} // end namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index 73924b7..a74d42d 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -104,10 +104,8 @@ unsigned BlackfinInstrInfo::
InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc DL;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -124,69 +122,73 @@ InsertBranch(MachineBasicBlock &MBB,
llvm_unreachable("Implement conditional branches!");
}
-static bool inClass(const TargetRegisterClass &Test,
- unsigned Reg,
- const TargetRegisterClass *RC) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
- return Test.contains(Reg);
- else
- return &Test==RC || Test.hasSubClass(RC);
-}
-
-bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg,
- unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
- inClass(BF::ALLRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
- return true;
+void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (BF::ALLRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::MOVE), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (inClass(BF::D16RegClass, DestReg, DestRC) &&
- inClass(BF::D16RegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0);
- return true;
+ if (BF::D16RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
}
- if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) &&
- inClass(BF::DRegClass, DestReg, DestRC)) {
- if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg);
+ if (BF::DRegClass.contains(DestReg)) {
+ if (SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0);
- } else {
- BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg);
+ return;
+ }
+ if (SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return true;
}
- if (inClass(BF::AnyCCRegClass, DestReg, DestRC) &&
- inClass(BF::DRegClass, SrcReg, SrcRC)) {
- if (inClass(BF::NotCCRegClass, DestReg, DestRC))
- BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg);
- else
- BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg);
- return true;
+ if (BF::DRegClass.contains(SrcReg)) {
+ if (DestReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0);
+ return;
+ }
+ if (DestReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
}
- if (inClass(BF::NotCCRegClass, DestReg, DestRC) &&
- inClass(BF::JustCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg);
- return true;
+
+ if (DestReg == BF::NCC && SrcReg == BF::CC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- if (inClass(BF::JustCCRegClass, DestReg, DestRC) &&
- inClass(BF::NotCCRegClass, SrcReg, SrcRC)) {
- BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg);
- return true;
+ if (DestReg == BF::CC && SrcReg == BF::NCC) {
+ BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+
- SrcRC->getName() + " -> " + DestRC->getName()).c_str());
- return false;
+ llvm_unreachable("Bad reg-to-reg copy");
+}
+
+static bool inClass(const TargetRegisterClass &Test,
+ unsigned Reg,
+ const TargetRegisterClass *RC) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Test.contains(Reg);
+ else
+ return &Test==RC || Test.hasSubClass(RC);
}
void
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index c1dcd58..6c35917 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -44,14 +44,13 @@ namespace llvm {
InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
-
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 5cf350a..8034a7f 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -488,7 +488,7 @@ def MOVE: F1<(outs ALL:$dst), (ins ALL:$src),
"$dst = $src;",
[]>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc),
"if $cc $dst = $src2;",
[(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>;
@@ -645,7 +645,7 @@ def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2),
// Table C-15. Bit Operations Instructions
//===----------------------------------------------------------------------===//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2),
"bitclr($dst, $src2);",
[(set D:$dst, (and D:$src1, uimm5imask:$src2))]>;
@@ -691,7 +691,7 @@ multiclass SHIFT32<SDNode opnode, string ops> {
}
let Defs = [AZ, AN, V, VS],
- isTwoAddress = 1 in {
+ Constraints = "$src = $dst" in {
defm SRA : SHIFT32<sra, ">>>">;
defm SRL : SHIFT32<srl, ">>">;
defm SLL : SHIFT32<shl, "<<">;
@@ -748,7 +748,7 @@ def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2),
"$dst = $src1 + $src2;",
[(set D16:$dst, (add D16:$src1, D16:$src2))]>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2),
"$dst += $src2;",
[(set D:$dst, (add D:$src1, imm7:$src2))]>;
@@ -775,7 +775,7 @@ def NEG: F1<(outs D:$dst), (ins D:$src),
def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2),
"$dst = $src1 + $src2;", []>;
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2),
"$dst += $src2;", []>;
@@ -802,7 +802,7 @@ def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2),
}
-let isTwoAddress = 1 in
+let Constraints = "$src1 = $dst" in
def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2),
"$dst *= $src2;",
[(set D:$dst, (mul D:$src1, D:$src2))]>;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 5153ace..06e95de 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -48,17 +48,6 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-const TargetRegisterClass* const *BlackfinRegisterInfo::
-getCalleeSavedRegClasses(const MachineFunction *MF) const {
- using namespace BF;
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &PRegClass,
- &DRegClass, &DRegClass, &DRegClass, &DRegClass,
- &PRegClass, &PRegClass, &PRegClass,
- 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector
BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
using namespace BF;
@@ -86,25 +75,6 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-const TargetRegisterClass*
-BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
- assert(isPhysicalRegister(reg) && "reg must be a physical register");
-
- // Pick the smallest register class of the right type that contains
- // this physreg.
- const TargetRegisterClass* BestRC = 0;
- for (regclass_iterator I = regclass_begin(), E = regclass_end();
- I != E; ++I) {
- const TargetRegisterClass* RC = *I;
- if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || RC->getNumRegs() < BestRC->getNumRegs()))
- BestRC = RC;
- }
-
- assert(BestRC && "Couldn't find the register class");
- return BestRC;
-}
-
// hasFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 03c5450..ead0b4a 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -33,9 +33,6 @@ namespace llvm {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
// getSubReg implemented by tablegen
@@ -44,9 +41,6 @@ namespace llvm {
return &BF::PRegClass;
}
- const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg,
- EVT VT) const;
-
bool hasFP(const MachineFunction &MF) const;
// bool hasReservedCallFrame(MachineFunction &MF) const;
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 55b8aaa..e8d8474 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -264,7 +264,7 @@ namespace {
//
static const AllocaInst *isDirectAlloca(const Value *V) {
const AllocaInst *AI = dyn_cast<AllocaInst>(V);
- if (!AI) return false;
+ if (!AI) return 0;
if (AI->isArrayAllocation())
return 0; // FIXME: we can also inline fixed size array allocas!
if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock())
@@ -2889,7 +2889,7 @@ void CWriter::visitCallInst(CallInst &I) {
bool hasByVal = I.hasByValArgument();
bool isStructRet = I.hasStructRetAttr();
if (isStructRet) {
- writeOperandDeref(I.getOperand(1));
+ writeOperandDeref(I.getArgOperand(0));
Out << " = ";
}
@@ -2944,8 +2944,8 @@ void CWriter::visitCallInst(CallInst &I) {
}
unsigned NumDeclaredParams = FTy->getNumParams();
-
- CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end();
+ CallSite CS(&I);
+ CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
unsigned ArgNo = 0;
if (isStructRet) { // Skip struct return argument.
++AI;
@@ -2999,7 +2999,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << "0; ";
Out << "va_start(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
// Output the last argument to the enclosing function.
if (I.getParent()->getParent()->arg_empty())
@@ -3009,9 +3009,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << ')';
return true;
case Intrinsic::vaend:
- if (!isa<ConstantPointerNull>(I.getOperand(1))) {
+ if (!isa<ConstantPointerNull>(I.getArgOperand(0))) {
Out << "0; va_end(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
} else {
Out << "va_end(*(va_list*)0)";
@@ -3020,47 +3020,47 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
case Intrinsic::vacopy:
Out << "0; ";
Out << "va_copy(*(va_list*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", *(va_list*)";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::returnaddress:
Out << "__builtin_return_address(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::frameaddress:
Out << "__builtin_frame_address(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::powi:
Out << "__builtin_powi(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::setjmp:
Out << "setjmp(*(jmp_buf*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ')';
return true;
case Intrinsic::longjmp:
Out << "longjmp(*(jmp_buf*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ')';
return true;
case Intrinsic::prefetch:
Out << "LLVM_PREFETCH((const void *)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ", ";
- writeOperand(I.getOperand(3));
+ writeOperand(I.getArgOperand(2));
Out << ")";
return true;
case Intrinsic::stacksave:
@@ -3077,7 +3077,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
printType(Out, I.getType());
Out << ')';
// Multiple GCC builtins multiplex onto this intrinsic.
- switch (cast<ConstantInt>(I.getOperand(3))->getZExtValue()) {
+ switch (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue()) {
default: llvm_unreachable("Invalid llvm.x86.sse.cmp!");
case 0: Out << "__builtin_ia32_cmpeq"; break;
case 1: Out << "__builtin_ia32_cmplt"; break;
@@ -3098,9 +3098,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
Out << 'd';
Out << "(";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ", ";
- writeOperand(I.getOperand(2));
+ writeOperand(I.getArgOperand(1));
Out << ")";
return true;
case Intrinsic::ppc_altivec_lvsl:
@@ -3108,7 +3108,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
printType(Out, I.getType());
Out << ')';
Out << "__builtin_altivec_lvsl(0, (void*)";
- writeOperand(I.getOperand(1));
+ writeOperand(I.getArgOperand(0));
Out << ")";
return true;
}
@@ -3221,7 +3221,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
DestVal = ResultVals[ValueCount].first;
DestValNo = ResultVals[ValueCount].second;
} else
- DestVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+ DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
if (I->isEarlyClobber)
C = "&"+C;
@@ -3255,7 +3255,7 @@ void CWriter::visitInlineAsm(CallInst &CI) {
}
assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
- Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1);
+ Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
Out << "\"" << C << "\"(";
if (!I->isIndirect)
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index 10dc837..ec2f663 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -34,76 +34,19 @@ def RetCC_SPU : CallingConv<[
//===----------------------------------------------------------------------===//
// CellSPU Argument Calling Conventions
-// (note: this isn't used, but presumably should be at some point when other
-// targets do.)
//===----------------------------------------------------------------------===//
-/*
-def CC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
- CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
- CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
- R12, R13, R14, R15, R16, R17, R18, R19, R20,
- R21, R22, R23, R24, R25, R26, R27, R28, R29,
- R30, R31, R32, R33, R34, R35, R36, R37, R38,
- R39, R40, R41, R42, R43, R44, R45, R46, R47,
- R48, R49, R50, R51, R52, R53, R54, R55, R56,
- R57, R58, R59, R60, R61, R62, R63, R64, R65,
- R66, R67, R68, R69, R70, R71, R72, R73, R74,
- R75, R76, R77, R78, R79]>>,
-
+def CCC_SPU : CallingConv<[
+ CCIfType<[i8, i16, i32, i64, i128, f32, f64,
+ v16i8, v8i16, v4i32, v4f32, v2i64, v2f64],
+ CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11,
+ R12, R13, R14, R15, R16, R17, R18, R19, R20,
+ R21, R22, R23, R24, R25, R26, R27, R28, R29,
+ R30, R31, R32, R33, R34, R35, R36, R37, R38,
+ R39, R40, R41, R42, R43, R44, R45, R46, R47,
+ R48, R49, R50, R51, R52, R53, R54, R55, R56,
+ R57, R58, R59, R60, R61, R62, R63, R64, R65,
+ R66, R67, R68, R69, R70, R71, R72, R73, R74,
+ R75, R76, R77, R78, R79]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
@@ -112,4 +55,3 @@ def CC_SPU : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCAssignToStack<16, 16>>
]>;
-*/
diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h
index e8ca333..f511acd 100644
--- a/lib/Target/CellSPU/SPUFrameInfo.h
+++ b/lib/Target/CellSPU/SPUFrameInfo.h
@@ -53,10 +53,6 @@ namespace llvm {
static int minStackSize() {
return (2 * stackSlotSize());
}
- //! Frame size required to spill all registers plus frame info
- static int fullSpillSize() {
- return (SPURegisterInfo::getNumArgRegs() * stackSlotSize());
- }
//! Convert frame index to stack offset
static int FItoStackOffset(int frame_index) {
return frame_index * stackSlotSize();
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9afdb2b..9b8c2dd 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -275,7 +275,6 @@ namespace {
SDNode *emitBuildVector(SDNode *bvNode) {
EVT vecVT = bvNode->getValueType(0);
- EVT eltVT = vecVT.getVectorElementType();
DebugLoc dl = bvNode->getDebugLoc();
// Check to see if this vector can be represented as a CellSPU immediate
@@ -606,18 +605,14 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Base = CurDAG->getTargetConstant(0, N.getValueType());
Index = N;
return true;
- } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) {
+ } else if (Opc == ISD::Register
+ ||Opc == ISD::CopyFromReg
+ ||Opc == ISD::UNDEF) {
unsigned OpOpc = Op->getOpcode();
if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) {
// Direct load/store without getelementptr
- SDValue Addr, Offs;
-
- // Get the register from CopyFromReg
- if (Opc == ISD::CopyFromReg)
- Addr = N.getOperand(1);
- else
- Addr = N; // Register
+ SDValue Offs;
Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2));
@@ -626,7 +621,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base,
Offs = CurDAG->getTargetConstant(0, Offs.getValueType());
Base = Offs;
- Index = Addr;
+ Index = N;
return true;
}
} else {
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 081e8d0..ece19b9 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -953,7 +953,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ PtrVT, GSDN->getOffset());
const TargetMachine &TM = DAG.getTarget();
SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there is no actual debug info here
@@ -1013,22 +1014,26 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
MachineRegisterInfo &RegInfo = MF.getRegInfo();
SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
-
unsigned ArgOffset = SPUFrameInfo::minStackSize();
unsigned ArgRegIdx = 0;
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
+
// Add DAG nodes to load the arguments or copy them out of registers.
for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
SDValue ArgVal;
+ CCValAssign &VA = ArgLocs[ArgNo];
- if (ArgRegIdx < NumArgRegs) {
+ if (VA.isRegLoc()) {
const TargetRegisterClass *ArgRegClass;
switch (ObjectVT.getSimpleVT().SimpleTy) {
@@ -1067,14 +1072,14 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
}
unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
- RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
+ RegInfo.addLiveIn(VA.getLocReg(), VReg);
ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
++ArgRegIdx;
} else {
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
ArgOffset += StackSlotSize;
@@ -1087,16 +1092,31 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// vararg handling:
if (isVarArg) {
- // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
+ // FIXME: we should be able to query the argument registers from
+ // tablegen generated code.
+ static const unsigned ArgRegs[] = {
+ SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
+ SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
+ SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
+ SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
+ SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
+ SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
+ SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
+ SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
+ SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
+ SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
+ SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
+ };
+ // size of ArgRegs array
+ unsigned NumArgRegs = 77;
+
// We will spill (79-3)+1 registers to the stack
SmallVector<SDValue, 79-3+1> MemOps;
// Create the frame slot
-
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
FuncInfo->setVarArgsFrameIndex(
- MFI->CreateFixedObject(StackSlotSize, ArgOffset,
- true, false));
+ MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
@@ -1135,6 +1155,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1144,8 +1165,15 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
unsigned NumOps = Outs.size();
unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
- const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
- const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
+
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ // FIXME: allow for other calling conventions
+ CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
+
+ const unsigned NumArgRegs = ArgLocs.size();
+
// Handy pointer type
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -1165,8 +1193,9 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// And the arguments passed on the stack
SmallVector<SDValue, 8> MemOpChains;
- for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
+ SDValue Arg = OutVals[ArgRegIdx];
+ CCValAssign &VA = ArgLocs[ArgRegIdx];
// PtrOff will be used to store the current argument to the stack if a
// register cannot be found for it.
@@ -1180,24 +1209,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case MVT::i32:
case MVT::i64:
case MVT::i128:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::f32:
case MVT::f64:
- if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
- } else {
- MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
- false, false, 0));
- ArgOffset += StackSlotSize;
- }
- break;
case MVT::v2i64:
case MVT::v2f64:
case MVT::v4f32:
@@ -1205,7 +1218,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case MVT::v8i16:
case MVT::v16i8:
if (ArgRegIdx != NumArgRegs) {
- RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
false, false, 0));
@@ -1249,7 +1262,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
const GlobalValue *GV = G->getGlobal();
EVT CalleeVT = Callee.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
if (!ST->usingLargeMem()) {
// Turn calls to targets that are defined (i.e., have bodies) into BRSL
@@ -1355,6 +1368,7 @@ SDValue
SPUTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -1376,7 +1390,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
@@ -1746,15 +1760,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned V0Elt = 0;
bool monotonic = true;
bool rotate = true;
+ EVT maskVT; // which of the c?d instructions to use
if (EltVT == MVT::i8) {
V2EltIdx0 = 16;
+ maskVT = MVT::v16i8;
} else if (EltVT == MVT::i16) {
V2EltIdx0 = 8;
+ maskVT = MVT::v8i16;
} else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
V2EltIdx0 = 4;
+ maskVT = MVT::v4i32;
} else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
V2EltIdx0 = 2;
+ maskVT = MVT::v2i64;
} else
llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
@@ -1786,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
} else {
rotate = false;
}
- } else if (PrevElt == 0) {
+ } else if (i == 0) {
// First time through, need to keep track of previous element
PrevElt = SrcElt;
} else {
@@ -1798,18 +1817,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (EltsFromV2 == 1 && monotonic) {
// Compute mask and shuffle
- MachineFunction &MF = DAG.getMachineFunction();
- MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- // Initialize temporary register to 0
- SDValue InitTempReg =
- DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
- // Copy register's contents as index in SHUFFLE_MASK:
- SDValue ShufMaskOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
- DAG.getTargetConstant(V2Elt, MVT::i32),
- DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
+
+ // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
+ // R1 ($sp) is used here only as it is guaranteed to have last bits zero
+ SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
+ DAG.getRegister(SPU::R1, PtrVT),
+ DAG.getConstant(V2Elt, MVT::i32));
+ SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
+ maskVT, Pointer);
+
// Use shuffle mask in SHUFB synthetic instruction:
return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
ShufMaskOp);
@@ -2056,14 +2073,19 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
- ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
- assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ // use 0 when the lane to insert to is 'undef'
+ int64_t Idx=0;
+ if (IdxOp.getOpcode() != ISD::UNDEF) {
+ ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
+ assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
+ Idx = (CN->getSExtValue());
+ }
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
// Use $sp ($1) because it's always 16-byte aligned and it's available:
SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
DAG.getRegister(SPU::R1, PtrVT),
- DAG.getConstant(CN->getSExtValue(), PtrVT));
+ DAG.getConstant(Idx, PtrVT));
SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
SDValue result =
@@ -2862,7 +2884,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
case SPUISD::IndirectAddr: {
if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
- if (CN != 0 && CN->getZExtValue() == 0) {
+ if (CN != 0 && CN->isNullValue()) {
// (SPUindirect (SPUaform <addr>, 0), 0) ->
// (SPUaform <addr>, 0)
@@ -3056,12 +3078,10 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const {
// Default, for the time being, to the base class handler
- TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
- Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
}
/// isLegalAddressImmediate - Return true if the integer value can be used
diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h
index 9ebd442..6d3c90b 100644
--- a/lib/Target/CellSPU/SPUISelLowering.h
+++ b/lib/Target/CellSPU/SPUISelLowering.h
@@ -134,7 +134,6 @@ namespace llvm {
EVT VT) const;
void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -160,6 +159,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -168,6 +168,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
};
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 4c53c98..69aa088 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -164,11 +164,9 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI,
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
"invalid SPU OR<type>_<vec> or LR instruction!");
- if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
sourceReg = MI.getOperand(1).getReg();
destReg = MI.getOperand(0).getReg();
return true;
- }
break;
}
case SPU::ORv16i8:
@@ -251,40 +249,18 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return 0;
}
-bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const
+void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const
{
// We support cross register class moves for our aliases, such as R3 in any
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
- if (DestRC == SPU::R8CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R16CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R32CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R32FPRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R64CRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::R64FPRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::GPRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg);
- } else if (DestRC == SPU::VECREGRegisterClass) {
- BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg);
- } else {
- // Attempt to copy unknown/unsupported register class!
- return false;
- }
-
- return true;
+ BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
void
@@ -356,88 +332,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx);
}
-//! Return true if the specified load or store can be folded
-bool
-SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- // Make sure this is a reg-reg copy.
- unsigned Opc = MI->getOpcode();
-
- switch (Opc) {
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORf32:
- case SPU::ORf64:
- if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg())
- return true;
- break;
- }
-
- return false;
-}
-
-/// foldMemoryOperand - SPU, like PPC, can only fold spills into
-/// copy instructions, turning them into load/store instructions.
-MachineInstr *
-SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const
-{
- if (Ops.size() != 1) return 0;
-
- unsigned OpNum = Ops[0];
- unsigned Opc = MI->getOpcode();
- MachineInstr *NewMI = 0;
-
- switch (Opc) {
- case SPU::ORv16i8:
- case SPU::ORv8i16:
- case SPU::ORv4i32:
- case SPU::ORv2i64:
- case SPU::ORr8:
- case SPU::ORr16:
- case SPU::ORr32:
- case SPU::ORr64:
- case SPU::ORf32:
- case SPU::ORf64:
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- if (FrameIndex < SPUFrameInfo::maxFrameOffset()) {
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(),
- get(SPU::STQDr32));
-
- MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef));
- NewMI = addFrameReference(MIB, FrameIndex);
- }
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc));
-
- MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef));
- Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset())
- ? SPU::STQDr32 : SPU::STQXr32;
- NewMI = addFrameReference(MIB, FrameIndex);
- break;
- }
- }
-
- return NewMI;
-}
-
//! Branch analysis
/*!
\note This code was kiped from PPC. There may be more branch analysis for
@@ -554,9 +448,8 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -566,14 +459,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) {
if (Cond.empty()) {
// Unconditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR));
MIB.addMBB(TBB);
DEBUG(errs() << "Inserted one-way uncond branch: ");
DEBUG((*MIB).dump());
} else {
// Conditional branch
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
DEBUG(errs() << "Inserted one-way cond branch: ");
@@ -581,8 +474,8 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
return 1;
} else {
- MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm()));
- MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR));
+ MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm()));
+ MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR));
// Two-way Conditional Branch.
MIB.addReg(Cond[1].getReg()).addMBB(TBB);
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 6dabd7c..fbb1733 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -23,19 +23,6 @@ namespace llvm {
class SPUInstrInfo : public TargetInstrInfoImpl {
SPUTargetMachine &TM;
const SPURegisterInfo RI;
- protected:
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
public:
explicit SPUInstrInfo(SPUTargetMachine &tm);
@@ -56,12 +43,10 @@ namespace llvm {
unsigned isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
//! Store a register to a stack slot, based on its register class.
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -77,11 +62,6 @@ namespace llvm {
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- //! Return true if the specified load or store can be folded
- virtual
- bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
//! Reverses a branch's condition, returning false on success.
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -94,8 +74,9 @@ namespace llvm {
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
};
}
diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td
index 846c7ed..647da30 100644
--- a/lib/Target/CellSPU/SPUNodes.td
+++ b/lib/Target/CellSPU/SPUNodes.td
@@ -21,7 +21,7 @@ def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq,
[SDNPHasChain, SDNPOutFlag]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq,
- [SDNPHasChain, SDNPOutFlag]>;
+ [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
//===----------------------------------------------------------------------===//
// Operand constraints:
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index d8937ec..f7cfa42 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -191,33 +191,6 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
{
}
-// SPU's 128-bit registers used for argument passing:
-static const unsigned SPU_ArgRegs[] = {
- SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9,
- SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
- SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
- SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
- SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
- SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
- SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
- SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
- SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
- SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
- SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
-};
-
-const unsigned *
-SPURegisterInfo::getArgRegs()
-{
- return SPU_ArgRegs;
-}
-
-unsigned
-SPURegisterInfo::getNumArgRegs()
-{
- return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]);
-}
-
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
const TargetRegisterClass *
@@ -251,36 +224,6 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const
return SPU_CalleeSaveRegs;
}
-const TargetRegisterClass* const*
-SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
-{
- // Cell ABI Calling Convention
- static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = {
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass,
- &SPU::GPRCRegClass, /* environment pointer */
- &SPU::GPRCRegClass, /* stack pointer */
- &SPU::GPRCRegClass, /* link register */
- 0 /* end */
- };
-
- return SPU_CalleeSaveRegClasses;
-}
-
/*!
R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is
generally unused) are the Cell's reserved registers
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 0a70318..7a6ae6d 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -49,10 +49,6 @@ namespace llvm {
//! Return the array of callee-saved registers
virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const;
- //! Return the register class array of the callee-saved registers
- virtual const TargetRegisterClass* const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
-
//! Allow for scavenging, so we can get scratch registers when needed.
virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
{ return true; }
@@ -90,15 +86,6 @@ namespace llvm {
// New methods added:
//------------------------------------------------------------------------
- //! Return the array of argument passing registers
- /*!
- \note The size of this array is returned by getArgRegsSize().
- */
- static const unsigned *getArgRegs();
-
- //! Return the size of the argument passing register array
- static unsigned getNumArgRegs();
-
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 45a0c84..145568a 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -99,11 +99,12 @@ namespace {
ValueSet DefinedValues;
ForwardRefMap ForwardRefs;
bool is_inline;
+ unsigned indent_level;
public:
static char ID;
explicit CppWriter(formatted_raw_ostream &o) :
- ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {}
+ ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){}
virtual const char *getPassName() const { return "C++ backend"; }
@@ -120,6 +121,11 @@ namespace {
void error(const std::string& msg);
+
+ formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0);
+ inline void in() { indent_level++; }
+ inline void out() { if (indent_level >0) indent_level--; }
+
private:
void printLinkageType(GlobalValue::LinkageTypes LT);
void printVisibilityType(GlobalValue::VisibilityTypes VisTypes);
@@ -153,1857 +159,1856 @@ namespace {
void printModuleBody();
};
+} // end anonymous namespace.
+
+formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) {
+ Out << '\n';
+ if (delta >= 0 || indent_level >= unsigned(-delta))
+ indent_level += delta;
+ Out.indent(indent_level);
+ return Out;
+}
+
+static inline void sanitize(std::string &str) {
+ for (size_t i = 0; i < str.length(); ++i)
+ if (!isalnum(str[i]) && str[i] != '_')
+ str[i] = '_';
+}
- static unsigned indent_level = 0;
- inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) {
- Out << "\n";
- if (delta >= 0 || indent_level >= unsigned(-delta))
- indent_level += delta;
- for (unsigned i = 0; i < indent_level; ++i)
- Out << " ";
- return Out;
+static std::string getTypePrefix(const Type *Ty) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "void_";
+ case Type::IntegerTyID:
+ return "int" + utostr(cast<IntegerType>(Ty)->getBitWidth()) + "_";
+ case Type::FloatTyID: return "float_";
+ case Type::DoubleTyID: return "double_";
+ case Type::LabelTyID: return "label_";
+ case Type::FunctionTyID: return "func_";
+ case Type::StructTyID: return "struct_";
+ case Type::ArrayTyID: return "array_";
+ case Type::PointerTyID: return "ptr_";
+ case Type::VectorTyID: return "packed_";
+ case Type::OpaqueTyID: return "opaque_";
+ default: return "other_";
}
+ return "unknown_";
+}
- inline void in() { indent_level++; }
- inline void out() { if (indent_level >0) indent_level--; }
+// Looks up the type in the symbol table and returns a pointer to its name or
+// a null pointer if it wasn't found. Note that this isn't the same as the
+// Mode::getTypeName function which will return an empty string, not a null
+// pointer if the name is not found.
+static const std::string *
+findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
+ TypeSymbolTable::const_iterator TI = ST.begin();
+ TypeSymbolTable::const_iterator TE = ST.end();
+ for (;TI != TE; ++TI)
+ if (TI->second == Ty)
+ return &(TI->first);
+ return 0;
+}
- inline void
- sanitize(std::string& str) {
- for (size_t i = 0; i < str.length(); ++i)
- if (!isalnum(str[i]) && str[i] != '_')
- str[i] = '_';
- }
+void CppWriter::error(const std::string& msg) {
+ report_fatal_error(msg);
+}
- inline std::string
- getTypePrefix(const Type* Ty ) {
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "void_";
- case Type::IntegerTyID:
- return std::string("int") + utostr(cast<IntegerType>(Ty)->getBitWidth()) +
- "_";
- case Type::FloatTyID: return "float_";
- case Type::DoubleTyID: return "double_";
- case Type::LabelTyID: return "label_";
- case Type::FunctionTyID: return "func_";
- case Type::StructTyID: return "struct_";
- case Type::ArrayTyID: return "array_";
- case Type::PointerTyID: return "ptr_";
- case Type::VectorTyID: return "packed_";
- case Type::OpaqueTyID: return "opaque_";
- default: return "other_";
- }
- return "unknown_";
- }
-
- // Looks up the type in the symbol table and returns a pointer to its name or
- // a null pointer if it wasn't found. Note that this isn't the same as the
- // Mode::getTypeName function which will return an empty string, not a null
- // pointer if the name is not found.
- inline const std::string*
- findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
- TypeSymbolTable::const_iterator TI = ST.begin();
- TypeSymbolTable::const_iterator TE = ST.end();
- for (;TI != TE; ++TI)
- if (TI->second == Ty)
- return &(TI->first);
- return 0;
- }
-
- void CppWriter::error(const std::string& msg) {
- report_fatal_error(msg);
- }
-
- // printCFP - Print a floating point constant .. very carefully :)
- // This makes sure that conversion to/from floating yields the same binary
- // result so that we don't lose precision.
- void CppWriter::printCFP(const ConstantFP *CFP) {
- bool ignored;
- APFloat APF = APFloat(CFP->getValueAPF()); // copy
- if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
- APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
- Out << "ConstantFP::get(mod->getContext(), ";
- Out << "APFloat(";
+// printCFP - Print a floating point constant .. very carefully :)
+// This makes sure that conversion to/from floating yields the same binary
+// result so that we don't lose precision.
+void CppWriter::printCFP(const ConstantFP *CFP) {
+ bool ignored;
+ APFloat APF = APFloat(CFP->getValueAPF()); // copy
+ if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
+ APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+ Out << "ConstantFP::get(mod->getContext(), ";
+ Out << "APFloat(";
#if HAVE_PRINTF_A
- char Buffer[100];
- sprintf(Buffer, "%A", APF.convertToDouble());
- if ((!strncmp(Buffer, "0x", 2) ||
- !strncmp(Buffer, "-0x", 3) ||
- !strncmp(Buffer, "+0x", 3)) &&
- APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
- if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << "BitsToDouble(" << Buffer << ")";
- else
- Out << "BitsToFloat((float)" << Buffer << ")";
- Out << ")";
- } else {
+ char Buffer[100];
+ sprintf(Buffer, "%A", APF.convertToDouble());
+ if ((!strncmp(Buffer, "0x", 2) ||
+ !strncmp(Buffer, "-0x", 3) ||
+ !strncmp(Buffer, "+0x", 3)) &&
+ APF.bitwiseIsEqual(APFloat(atof(Buffer)))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(" << Buffer << ")";
+ else
+ Out << "BitsToFloat((float)" << Buffer << ")";
+ Out << ")";
+ } else {
#endif
- std::string StrVal = ftostr(CFP->getValueAPF());
-
- while (StrVal[0] == ' ')
- StrVal.erase(StrVal.begin());
-
- // Check to make sure that the stringized number is not some string like
- // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
- if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
- ((StrVal[0] == '-' || StrVal[0] == '+') &&
- (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
- (CFP->isExactlyValue(atof(StrVal.c_str())))) {
- if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << StrVal;
- else
- Out << StrVal << "f";
- } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
- Out << "BitsToDouble(0x"
- << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
- << "ULL) /* " << StrVal << " */";
+ std::string StrVal = ftostr(CFP->getValueAPF());
+
+ while (StrVal[0] == ' ')
+ StrVal.erase(StrVal.begin());
+
+ // Check to make sure that the stringized number is not some string like
+ // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex.
+ if (((StrVal[0] >= '0' && StrVal[0] <= '9') ||
+ ((StrVal[0] == '-' || StrVal[0] == '+') &&
+ (StrVal[1] >= '0' && StrVal[1] <= '9'))) &&
+ (CFP->isExactlyValue(atof(StrVal.c_str())))) {
+ if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << StrVal;
else
- Out << "BitsToFloat(0x"
- << utohexstr((uint32_t)CFP->getValueAPF().
- bitcastToAPInt().getZExtValue())
- << "U) /* " << StrVal << " */";
- Out << ")";
+ Out << StrVal << "f";
+ } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext()))
+ Out << "BitsToDouble(0x"
+ << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue())
+ << "ULL) /* " << StrVal << " */";
+ else
+ Out << "BitsToFloat(0x"
+ << utohexstr((uint32_t)CFP->getValueAPF().
+ bitcastToAPInt().getZExtValue())
+ << "U) /* " << StrVal << " */";
+ Out << ")";
#if HAVE_PRINTF_A
- }
+ }
#endif
- Out << ")";
+ Out << ")";
+}
+
+void CppWriter::printCallingConv(CallingConv::ID cc){
+ // Print the calling convention.
+ switch (cc) {
+ case CallingConv::C: Out << "CallingConv::C"; break;
+ case CallingConv::Fast: Out << "CallingConv::Fast"; break;
+ case CallingConv::Cold: Out << "CallingConv::Cold"; break;
+ case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
+ default: Out << cc; break;
}
+}
- void CppWriter::printCallingConv(CallingConv::ID cc){
- // Print the calling convention.
- switch (cc) {
- case CallingConv::C: Out << "CallingConv::C"; break;
- case CallingConv::Fast: Out << "CallingConv::Fast"; break;
- case CallingConv::Cold: Out << "CallingConv::Cold"; break;
- case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break;
- default: Out << cc; break;
- }
+void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
+ switch (LT) {
+ case GlobalValue::InternalLinkage:
+ Out << "GlobalValue::InternalLinkage"; break;
+ case GlobalValue::PrivateLinkage:
+ Out << "GlobalValue::PrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateLinkage:
+ Out << "GlobalValue::LinkerPrivateLinkage"; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "GlobalValue::LinkerPrivateWeakLinkage"; break;
+ case GlobalValue::AvailableExternallyLinkage:
+ Out << "GlobalValue::AvailableExternallyLinkage "; break;
+ case GlobalValue::LinkOnceAnyLinkage:
+ Out << "GlobalValue::LinkOnceAnyLinkage "; break;
+ case GlobalValue::LinkOnceODRLinkage:
+ Out << "GlobalValue::LinkOnceODRLinkage "; break;
+ case GlobalValue::WeakAnyLinkage:
+ Out << "GlobalValue::WeakAnyLinkage"; break;
+ case GlobalValue::WeakODRLinkage:
+ Out << "GlobalValue::WeakODRLinkage"; break;
+ case GlobalValue::AppendingLinkage:
+ Out << "GlobalValue::AppendingLinkage"; break;
+ case GlobalValue::ExternalLinkage:
+ Out << "GlobalValue::ExternalLinkage"; break;
+ case GlobalValue::DLLImportLinkage:
+ Out << "GlobalValue::DLLImportLinkage"; break;
+ case GlobalValue::DLLExportLinkage:
+ Out << "GlobalValue::DLLExportLinkage"; break;
+ case GlobalValue::ExternalWeakLinkage:
+ Out << "GlobalValue::ExternalWeakLinkage"; break;
+ case GlobalValue::CommonLinkage:
+ Out << "GlobalValue::CommonLinkage"; break;
}
+}
- void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) {
- switch (LT) {
- case GlobalValue::InternalLinkage:
- Out << "GlobalValue::InternalLinkage"; break;
- case GlobalValue::PrivateLinkage:
- Out << "GlobalValue::PrivateLinkage"; break;
- case GlobalValue::LinkerPrivateLinkage:
- Out << "GlobalValue::LinkerPrivateLinkage"; break;
- case GlobalValue::AvailableExternallyLinkage:
- Out << "GlobalValue::AvailableExternallyLinkage "; break;
- case GlobalValue::LinkOnceAnyLinkage:
- Out << "GlobalValue::LinkOnceAnyLinkage "; break;
- case GlobalValue::LinkOnceODRLinkage:
- Out << "GlobalValue::LinkOnceODRLinkage "; break;
- case GlobalValue::WeakAnyLinkage:
- Out << "GlobalValue::WeakAnyLinkage"; break;
- case GlobalValue::WeakODRLinkage:
- Out << "GlobalValue::WeakODRLinkage"; break;
- case GlobalValue::AppendingLinkage:
- Out << "GlobalValue::AppendingLinkage"; break;
- case GlobalValue::ExternalLinkage:
- Out << "GlobalValue::ExternalLinkage"; break;
- case GlobalValue::DLLImportLinkage:
- Out << "GlobalValue::DLLImportLinkage"; break;
- case GlobalValue::DLLExportLinkage:
- Out << "GlobalValue::DLLExportLinkage"; break;
- case GlobalValue::ExternalWeakLinkage:
- Out << "GlobalValue::ExternalWeakLinkage"; break;
- case GlobalValue::CommonLinkage:
- Out << "GlobalValue::CommonLinkage"; break;
- }
+void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
+ switch (VisType) {
+ default: llvm_unreachable("Unknown GVar visibility");
+ case GlobalValue::DefaultVisibility:
+ Out << "GlobalValue::DefaultVisibility";
+ break;
+ case GlobalValue::HiddenVisibility:
+ Out << "GlobalValue::HiddenVisibility";
+ break;
+ case GlobalValue::ProtectedVisibility:
+ Out << "GlobalValue::ProtectedVisibility";
+ break;
}
+}
- void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) {
- switch (VisType) {
- default: llvm_unreachable("Unknown GVar visibility");
- case GlobalValue::DefaultVisibility:
- Out << "GlobalValue::DefaultVisibility";
- break;
- case GlobalValue::HiddenVisibility:
- Out << "GlobalValue::HiddenVisibility";
- break;
- case GlobalValue::ProtectedVisibility:
- Out << "GlobalValue::ProtectedVisibility";
- break;
+// printEscapedString - Print each character of the specified string, escaping
+// it if it is not printable or if it is an escape char.
+void CppWriter::printEscapedString(const std::string &Str) {
+ for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+ unsigned char C = Str[i];
+ if (isprint(C) && C != '"' && C != '\\') {
+ Out << C;
+ } else {
+ Out << "\\x"
+ << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
+ << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
}
}
+}
- // printEscapedString - Print each character of the specified string, escaping
- // it if it is not printable or if it is an escape char.
- void CppWriter::printEscapedString(const std::string &Str) {
- for (unsigned i = 0, e = Str.size(); i != e; ++i) {
- unsigned char C = Str[i];
- if (isprint(C) && C != '"' && C != '\\') {
- Out << C;
- } else {
- Out << "\\x"
- << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A'))
- << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A'));
- }
+std::string CppWriter::getCppName(const Type* Ty) {
+ // First, handle the primitive types .. easy
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
+ switch (Ty->getTypeID()) {
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
+ case Type::IntegerTyID: {
+ unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
+ }
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
+ default:
+ error("Invalid primitive type");
+ break;
}
+ // shouldn't be returned, but make it sensible
+ return "Type::getVoidTy(mod->getContext())";
}
- std::string CppWriter::getCppName(const Type* Ty) {
- // First, handle the primitive types .. easy
- if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
- case Type::IntegerTyID: {
- unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
- return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
- }
- case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
- case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
- case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
- case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
- default:
- error("Invalid primitive type");
- break;
- }
- // shouldn't be returned, but make it sensible
- return "Type::getVoidTy(mod->getContext())";
- }
+ // Now, see if we've seen the type before and return that
+ TypeMap::iterator I = TypeNames.find(Ty);
+ if (I != TypeNames.end())
+ return I->second;
+
+ // Okay, let's build a new name for this type. Start with a prefix
+ const char* prefix = 0;
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: prefix = "FuncTy_"; break;
+ case Type::StructTyID: prefix = "StructTy_"; break;
+ case Type::ArrayTyID: prefix = "ArrayTy_"; break;
+ case Type::PointerTyID: prefix = "PointerTy_"; break;
+ case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
+ case Type::VectorTyID: prefix = "VectorTy_"; break;
+ default: prefix = "OtherTy_"; break; // prevent breakage
+ }
- // Now, see if we've seen the type before and return that
- TypeMap::iterator I = TypeNames.find(Ty);
- if (I != TypeNames.end())
- return I->second;
+ // See if the type has a name in the symboltable and build accordingly
+ const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
+ std::string name;
+ if (tName)
+ name = std::string(prefix) + *tName;
+ else
+ name = std::string(prefix) + utostr(uniqueNum++);
+ sanitize(name);
+
+ // Save the name
+ return TypeNames[Ty] = name;
+}
- // Okay, let's build a new name for this type. Start with a prefix
- const char* prefix = 0;
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: prefix = "FuncTy_"; break;
- case Type::StructTyID: prefix = "StructTy_"; break;
- case Type::ArrayTyID: prefix = "ArrayTy_"; break;
- case Type::PointerTyID: prefix = "PointerTy_"; break;
- case Type::OpaqueTyID: prefix = "OpaqueTy_"; break;
- case Type::VectorTyID: prefix = "VectorTy_"; break;
- default: prefix = "OtherTy_"; break; // prevent breakage
- }
+void CppWriter::printCppName(const Type* Ty) {
+ printEscapedString(getCppName(Ty));
+}
- // See if the type has a name in the symboltable and build accordingly
- const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
- std::string name;
- if (tName)
- name = std::string(prefix) + *tName;
- else
- name = std::string(prefix) + utostr(uniqueNum++);
- sanitize(name);
-
- // Save the name
- return TypeNames[Ty] = name;
- }
-
- void CppWriter::printCppName(const Type* Ty) {
- printEscapedString(getCppName(Ty));
- }
-
- std::string CppWriter::getCppName(const Value* val) {
- std::string name;
- ValueMap::iterator I = ValueNames.find(val);
- if (I != ValueNames.end() && I->first == val)
- return I->second;
-
- if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
- name = std::string("gvar_") +
- getTypePrefix(GV->getType()->getElementType());
- } else if (isa<Function>(val)) {
- name = std::string("func_");
- } else if (const Constant* C = dyn_cast<Constant>(val)) {
- name = std::string("const_") + getTypePrefix(C->getType());
- } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
- if (is_inline) {
- unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
- Function::const_arg_iterator(Arg)) + 1;
- name = std::string("arg_") + utostr(argNum);
- NameSet::iterator NI = UsedNames.find(name);
- if (NI != UsedNames.end())
- name += std::string("_") + utostr(uniqueNum++);
- UsedNames.insert(name);
- return ValueNames[val] = name;
- } else {
- name = getTypePrefix(val->getType());
- }
+std::string CppWriter::getCppName(const Value* val) {
+ std::string name;
+ ValueMap::iterator I = ValueNames.find(val);
+ if (I != ValueNames.end() && I->first == val)
+ return I->second;
+
+ if (const GlobalVariable* GV = dyn_cast<GlobalVariable>(val)) {
+ name = std::string("gvar_") +
+ getTypePrefix(GV->getType()->getElementType());
+ } else if (isa<Function>(val)) {
+ name = std::string("func_");
+ } else if (const Constant* C = dyn_cast<Constant>(val)) {
+ name = std::string("const_") + getTypePrefix(C->getType());
+ } else if (const Argument* Arg = dyn_cast<Argument>(val)) {
+ if (is_inline) {
+ unsigned argNum = std::distance(Arg->getParent()->arg_begin(),
+ Function::const_arg_iterator(Arg)) + 1;
+ name = std::string("arg_") + utostr(argNum);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
} else {
name = getTypePrefix(val->getType());
}
- if (val->hasName())
- name += val->getName();
- else
- name += utostr(uniqueNum++);
- sanitize(name);
- NameSet::iterator NI = UsedNames.find(name);
- if (NI != UsedNames.end())
- name += std::string("_") + utostr(uniqueNum++);
- UsedNames.insert(name);
- return ValueNames[val] = name;
+ } else {
+ name = getTypePrefix(val->getType());
}
+ if (val->hasName())
+ name += val->getName();
+ else
+ name += utostr(uniqueNum++);
+ sanitize(name);
+ NameSet::iterator NI = UsedNames.find(name);
+ if (NI != UsedNames.end())
+ name += std::string("_") + utostr(uniqueNum++);
+ UsedNames.insert(name);
+ return ValueNames[val] = name;
+}
- void CppWriter::printCppName(const Value* val) {
- printEscapedString(getCppName(val));
- }
+void CppWriter::printCppName(const Value* val) {
+ printEscapedString(getCppName(val));
+}
- void CppWriter::printAttributes(const AttrListPtr &PAL,
- const std::string &name) {
- Out << "AttrListPtr " << name << "_PAL;";
- nl(Out);
- if (!PAL.isEmpty()) {
- Out << '{'; in(); nl(Out);
- Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
- Out << "AttributeWithIndex PAWI;"; nl(Out);
- for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
- unsigned index = PAL.getSlot(i).Index;
- Attributes attrs = PAL.getSlot(i).Attrs;
- Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
+void CppWriter::printAttributes(const AttrListPtr &PAL,
+ const std::string &name) {
+ Out << "AttrListPtr " << name << "_PAL;";
+ nl(Out);
+ if (!PAL.isEmpty()) {
+ Out << '{'; in(); nl(Out);
+ Out << "SmallVector<AttributeWithIndex, 4> Attrs;"; nl(Out);
+ Out << "AttributeWithIndex PAWI;"; nl(Out);
+ for (unsigned i = 0; i < PAL.getNumSlots(); ++i) {
+ unsigned index = PAL.getSlot(i).Index;
+ Attributes attrs = PAL.getSlot(i).Attrs;
+ Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 ";
#define HANDLE_ATTR(X) \
- if (attrs & Attribute::X) \
- Out << " | Attribute::" #X; \
- attrs &= ~Attribute::X;
-
- HANDLE_ATTR(SExt);
- HANDLE_ATTR(ZExt);
- HANDLE_ATTR(NoReturn);
- HANDLE_ATTR(InReg);
- HANDLE_ATTR(StructRet);
- HANDLE_ATTR(NoUnwind);
- HANDLE_ATTR(NoAlias);
- HANDLE_ATTR(ByVal);
- HANDLE_ATTR(Nest);
- HANDLE_ATTR(ReadNone);
- HANDLE_ATTR(ReadOnly);
- HANDLE_ATTR(InlineHint);
- HANDLE_ATTR(NoInline);
- HANDLE_ATTR(AlwaysInline);
- HANDLE_ATTR(OptimizeForSize);
- HANDLE_ATTR(StackProtect);
- HANDLE_ATTR(StackProtectReq);
- HANDLE_ATTR(NoCapture);
+ if (attrs & Attribute::X) \
+ Out << " | Attribute::" #X; \
+ attrs &= ~Attribute::X;
+
+ HANDLE_ATTR(SExt);
+ HANDLE_ATTR(ZExt);
+ HANDLE_ATTR(NoReturn);
+ HANDLE_ATTR(InReg);
+ HANDLE_ATTR(StructRet);
+ HANDLE_ATTR(NoUnwind);
+ HANDLE_ATTR(NoAlias);
+ HANDLE_ATTR(ByVal);
+ HANDLE_ATTR(Nest);
+ HANDLE_ATTR(ReadNone);
+ HANDLE_ATTR(ReadOnly);
+ HANDLE_ATTR(InlineHint);
+ HANDLE_ATTR(NoInline);
+ HANDLE_ATTR(AlwaysInline);
+ HANDLE_ATTR(OptimizeForSize);
+ HANDLE_ATTR(StackProtect);
+ HANDLE_ATTR(StackProtectReq);
+ HANDLE_ATTR(NoCapture);
#undef HANDLE_ATTR
- assert(attrs == 0 && "Unhandled attribute!");
- Out << ";";
- nl(Out);
- Out << "Attrs.push_back(PAWI);";
- nl(Out);
- }
- Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ assert(attrs == 0 && "Unhandled attribute!");
+ Out << ";";
+ nl(Out);
+ Out << "Attrs.push_back(PAWI);";
nl(Out);
- out(); nl(Out);
- Out << '}'; nl(Out);
}
+ Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());";
+ nl(Out);
+ out(); nl(Out);
+ Out << '}'; nl(Out);
}
+}
- bool CppWriter::printTypeInternal(const Type* Ty) {
- // We don't print definitions for primitive types
- if (Ty->isPrimitiveType() || Ty->isIntegerTy())
- return false;
-
- // If we already defined this type, we don't need to define it again.
- if (DefinedTypes.find(Ty) != DefinedTypes.end())
- return false;
-
- // Everything below needs the name for the type so get it now.
- std::string typeName(getCppName(Ty));
-
- // Search the type stack for recursion. If we find it, then generate this
- // as an OpaqueType, but make sure not to do this multiple times because
- // the type could appear in multiple places on the stack. Once the opaque
- // definition is issued, it must not be re-issued. Consequently we have to
- // check the UnresolvedTypes list as well.
- TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
- Ty);
- if (TI != TypeStack.end()) {
- TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
- if (I == UnresolvedTypes.end()) {
- Out << "PATypeHolder " << typeName;
- Out << "_fwd = OpaqueType::get(mod->getContext());";
- nl(Out);
- UnresolvedTypes[Ty] = typeName;
- }
- return true;
- }
+bool CppWriter::printTypeInternal(const Type* Ty) {
+ // We don't print definitions for primitive types
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy())
+ return false;
- // We're going to print a derived type which, by definition, contains other
- // types. So, push this one we're printing onto the type stack to assist with
- // recursive definitions.
- TypeStack.push_back(Ty);
+ // If we already defined this type, we don't need to define it again.
+ if (DefinedTypes.find(Ty) != DefinedTypes.end())
+ return false;
- // Print the type definition
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: {
- const FunctionType* FT = cast<FunctionType>(Ty);
- Out << "std::vector<const Type*>" << typeName << "_args;";
+ // Everything below needs the name for the type so get it now.
+ std::string typeName(getCppName(Ty));
+
+ // Search the type stack for recursion. If we find it, then generate this
+ // as an OpaqueType, but make sure not to do this multiple times because
+ // the type could appear in multiple places on the stack. Once the opaque
+ // definition is issued, it must not be re-issued. Consequently we have to
+ // check the UnresolvedTypes list as well.
+ TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
+ Ty);
+ if (TI != TypeStack.end()) {
+ TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
+ if (I == UnresolvedTypes.end()) {
+ Out << "PATypeHolder " << typeName;
+ Out << "_fwd = OpaqueType::get(mod->getContext());";
nl(Out);
- FunctionType::param_iterator PI = FT->param_begin();
- FunctionType::param_iterator PE = FT->param_end();
- for (; PI != PE; ++PI) {
- const Type* argTy = static_cast<const Type*>(*PI);
- bool isForward = printTypeInternal(argTy);
- std::string argName(getCppName(argTy));
- Out << typeName << "_args.push_back(" << argName;
- if (isForward)
- Out << "_fwd";
- Out << ");";
- nl(Out);
- }
- bool isForward = printTypeInternal(FT->getReturnType());
- std::string retTypeName(getCppName(FT->getReturnType()));
- Out << "FunctionType* " << typeName << " = FunctionType::get(";
- in(); nl(Out) << "/*Result=*/" << retTypeName;
+ UnresolvedTypes[Ty] = typeName;
+ }
+ return true;
+ }
+
+ // We're going to print a derived type which, by definition, contains other
+ // types. So, push this one we're printing onto the type stack to assist with
+ // recursive definitions.
+ TypeStack.push_back(Ty);
+
+ // Print the type definition
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: {
+ const FunctionType* FT = cast<FunctionType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_args;";
+ nl(Out);
+ FunctionType::param_iterator PI = FT->param_begin();
+ FunctionType::param_iterator PE = FT->param_end();
+ for (; PI != PE; ++PI) {
+ const Type* argTy = static_cast<const Type*>(*PI);
+ bool isForward = printTypeInternal(argTy);
+ std::string argName(getCppName(argTy));
+ Out << typeName << "_args.push_back(" << argName;
if (isForward)
Out << "_fwd";
- Out << ",";
- nl(Out) << "/*Params=*/" << typeName << "_args,";
- nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
- out();
- nl(Out);
- break;
- }
- case Type::StructTyID: {
- const StructType* ST = cast<StructType>(Ty);
- Out << "std::vector<const Type*>" << typeName << "_fields;";
- nl(Out);
- StructType::element_iterator EI = ST->element_begin();
- StructType::element_iterator EE = ST->element_end();
- for (; EI != EE; ++EI) {
- const Type* fieldTy = static_cast<const Type*>(*EI);
- bool isForward = printTypeInternal(fieldTy);
- std::string fieldName(getCppName(fieldTy));
- Out << typeName << "_fields.push_back(" << fieldName;
- if (isForward)
- Out << "_fwd";
- Out << ");";
- nl(Out);
- }
- Out << "StructType* " << typeName << " = StructType::get("
- << "mod->getContext(), "
- << typeName << "_fields, /*isPacked=*/"
- << (ST->isPacked() ? "true" : "false") << ");";
- nl(Out);
- break;
- }
- case Type::ArrayTyID: {
- const ArrayType* AT = cast<ArrayType>(Ty);
- const Type* ET = AT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "ArrayType* " << typeName << " = ArrayType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(AT->getNumElements()) << ");";
- nl(Out);
- break;
- }
- case Type::PointerTyID: {
- const PointerType* PT = cast<PointerType>(Ty);
- const Type* ET = PT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "PointerType* " << typeName << " = PointerType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(PT->getAddressSpace()) << ");";
- nl(Out);
- break;
- }
- case Type::VectorTyID: {
- const VectorType* PT = cast<VectorType>(Ty);
- const Type* ET = PT->getElementType();
- bool isForward = printTypeInternal(ET);
- std::string elemName(getCppName(ET));
- Out << "VectorType* " << typeName << " = VectorType::get("
- << elemName << (isForward ? "_fwd" : "")
- << ", " << utostr(PT->getNumElements()) << ");";
- nl(Out);
- break;
- }
- case Type::OpaqueTyID: {
- Out << "OpaqueType* " << typeName;
- Out << " = OpaqueType::get(mod->getContext());";
+ Out << ");";
nl(Out);
- break;
- }
- default:
- error("Invalid TypeID");
}
-
- // If the type had a name, make sure we recreate it.
- const std::string* progTypeName =
- findTypeName(TheModule->getTypeSymbolTable(),Ty);
- if (progTypeName) {
- Out << "mod->addTypeName(\"" << *progTypeName << "\", "
- << typeName << ");";
+ bool isForward = printTypeInternal(FT->getReturnType());
+ std::string retTypeName(getCppName(FT->getReturnType()));
+ Out << "FunctionType* " << typeName << " = FunctionType::get(";
+ in(); nl(Out) << "/*Result=*/" << retTypeName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ",";
+ nl(Out) << "/*Params=*/" << typeName << "_args,";
+ nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
+ out();
+ nl(Out);
+ break;
+ }
+ case Type::StructTyID: {
+ const StructType* ST = cast<StructType>(Ty);
+ Out << "std::vector<const Type*>" << typeName << "_fields;";
+ nl(Out);
+ StructType::element_iterator EI = ST->element_begin();
+ StructType::element_iterator EE = ST->element_end();
+ for (; EI != EE; ++EI) {
+ const Type* fieldTy = static_cast<const Type*>(*EI);
+ bool isForward = printTypeInternal(fieldTy);
+ std::string fieldName(getCppName(fieldTy));
+ Out << typeName << "_fields.push_back(" << fieldName;
+ if (isForward)
+ Out << "_fwd";
+ Out << ");";
nl(Out);
}
+ Out << "StructType* " << typeName << " = StructType::get("
+ << "mod->getContext(), "
+ << typeName << "_fields, /*isPacked=*/"
+ << (ST->isPacked() ? "true" : "false") << ");";
+ nl(Out);
+ break;
+ }
+ case Type::ArrayTyID: {
+ const ArrayType* AT = cast<ArrayType>(Ty);
+ const Type* ET = AT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "ArrayType* " << typeName << " = ArrayType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(AT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::PointerTyID: {
+ const PointerType* PT = cast<PointerType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "PointerType* " << typeName << " = PointerType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getAddressSpace()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::VectorTyID: {
+ const VectorType* PT = cast<VectorType>(Ty);
+ const Type* ET = PT->getElementType();
+ bool isForward = printTypeInternal(ET);
+ std::string elemName(getCppName(ET));
+ Out << "VectorType* " << typeName << " = VectorType::get("
+ << elemName << (isForward ? "_fwd" : "")
+ << ", " << utostr(PT->getNumElements()) << ");";
+ nl(Out);
+ break;
+ }
+ case Type::OpaqueTyID: {
+ Out << "OpaqueType* " << typeName;
+ Out << " = OpaqueType::get(mod->getContext());";
+ nl(Out);
+ break;
+ }
+ default:
+ error("Invalid TypeID");
+ }
- // Pop us off the type stack
- TypeStack.pop_back();
+ // If the type had a name, make sure we recreate it.
+ const std::string* progTypeName =
+ findTypeName(TheModule->getTypeSymbolTable(),Ty);
+ if (progTypeName) {
+ Out << "mod->addTypeName(\"" << *progTypeName << "\", "
+ << typeName << ");";
+ nl(Out);
+ }
- // Indicate that this type is now defined.
- DefinedTypes.insert(Ty);
+ // Pop us off the type stack
+ TypeStack.pop_back();
- // Early resolve as many unresolved types as possible. Search the unresolved
- // types map for the type we just printed. Now that its definition is complete
- // we can resolve any previous references to it. This prevents a cascade of
- // unresolved types.
- TypeMap::iterator I = UnresolvedTypes.find(Ty);
- if (I != UnresolvedTypes.end()) {
- Out << "cast<OpaqueType>(" << I->second
- << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
- nl(Out);
- Out << I->second << " = cast<";
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: Out << "FunctionType"; break;
- case Type::ArrayTyID: Out << "ArrayType"; break;
- case Type::StructTyID: Out << "StructType"; break;
- case Type::VectorTyID: Out << "VectorType"; break;
- case Type::PointerTyID: Out << "PointerType"; break;
- case Type::OpaqueTyID: Out << "OpaqueType"; break;
- default: Out << "NoSuchDerivedType"; break;
- }
- Out << ">(" << I->second << "_fwd.get());";
- nl(Out); nl(Out);
- UnresolvedTypes.erase(I);
- }
+ // Indicate that this type is now defined.
+ DefinedTypes.insert(Ty);
- // Finally, separate the type definition from other with a newline.
+ // Early resolve as many unresolved types as possible. Search the unresolved
+ // types map for the type we just printed. Now that its definition is complete
+ // we can resolve any previous references to it. This prevents a cascade of
+ // unresolved types.
+ TypeMap::iterator I = UnresolvedTypes.find(Ty);
+ if (I != UnresolvedTypes.end()) {
+ Out << "cast<OpaqueType>(" << I->second
+ << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
nl(Out);
-
- // We weren't a recursive type
- return false;
+ Out << I->second << " = cast<";
+ switch (Ty->getTypeID()) {
+ case Type::FunctionTyID: Out << "FunctionType"; break;
+ case Type::ArrayTyID: Out << "ArrayType"; break;
+ case Type::StructTyID: Out << "StructType"; break;
+ case Type::VectorTyID: Out << "VectorType"; break;
+ case Type::PointerTyID: Out << "PointerType"; break;
+ case Type::OpaqueTyID: Out << "OpaqueType"; break;
+ default: Out << "NoSuchDerivedType"; break;
+ }
+ Out << ">(" << I->second << "_fwd.get());";
+ nl(Out); nl(Out);
+ UnresolvedTypes.erase(I);
}
- // Prints a type definition. Returns true if it could not resolve all the
- // types in the definition but had to use a forward reference.
- void CppWriter::printType(const Type* Ty) {
- assert(TypeStack.empty());
- TypeStack.clear();
- printTypeInternal(Ty);
- assert(TypeStack.empty());
- }
-
- void CppWriter::printTypes(const Module* M) {
- // Walk the symbol table and print out all its types
- const TypeSymbolTable& symtab = M->getTypeSymbolTable();
- for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
- TI != TE; ++TI) {
-
- // For primitive types and types already defined, just add a name
- TypeMap::const_iterator TNI = TypeNames.find(TI->second);
- if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
- TNI != TypeNames.end()) {
- Out << "mod->addTypeName(\"";
- printEscapedString(TI->first);
- Out << "\", " << getCppName(TI->second) << ");";
- nl(Out);
- // For everything else, define the type
- } else {
- printType(TI->second);
- }
- }
+ // Finally, separate the type definition from other with a newline.
+ nl(Out);
- // Add all of the global variables to the value table...
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- if (I->hasInitializer())
- printType(I->getInitializer()->getType());
- printType(I->getType());
+ // We weren't a recursive type
+ return false;
+}
+
+// Prints a type definition. Returns true if it could not resolve all the
+// types in the definition but had to use a forward reference.
+void CppWriter::printType(const Type* Ty) {
+ assert(TypeStack.empty());
+ TypeStack.clear();
+ printTypeInternal(Ty);
+ assert(TypeStack.empty());
+}
+
+void CppWriter::printTypes(const Module* M) {
+ // Walk the symbol table and print out all its types
+ const TypeSymbolTable& symtab = M->getTypeSymbolTable();
+ for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
+ TI != TE; ++TI) {
+
+ // For primitive types and types already defined, just add a name
+ TypeMap::const_iterator TNI = TypeNames.find(TI->second);
+ if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
+ TNI != TypeNames.end()) {
+ Out << "mod->addTypeName(\"";
+ printEscapedString(TI->first);
+ Out << "\", " << getCppName(TI->second) << ");";
+ nl(Out);
+ // For everything else, define the type
+ } else {
+ printType(TI->second);
}
+ }
- // Add all the functions to the table
- for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
- FI != FE; ++FI) {
- printType(FI->getReturnType());
- printType(FI->getFunctionType());
- // Add all the function arguments
- for (Function::const_arg_iterator AI = FI->arg_begin(),
- AE = FI->arg_end(); AI != AE; ++AI) {
- printType(AI->getType());
- }
+ // Add all of the global variables to the value table...
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ if (I->hasInitializer())
+ printType(I->getInitializer()->getType());
+ printType(I->getType());
+ }
- // Add all of the basic blocks and instructions
- for (Function::const_iterator BB = FI->begin(),
- E = FI->end(); BB != E; ++BB) {
- printType(BB->getType());
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
- ++I) {
- printType(I->getType());
- for (unsigned i = 0; i < I->getNumOperands(); ++i)
- printType(I->getOperand(i)->getType());
- }
+ // Add all the functions to the table
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ printType(FI->getReturnType());
+ printType(FI->getFunctionType());
+ // Add all the function arguments
+ for (Function::const_arg_iterator AI = FI->arg_begin(),
+ AE = FI->arg_end(); AI != AE; ++AI) {
+ printType(AI->getType());
+ }
+
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ printType(BB->getType());
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ printType(I->getType());
+ for (unsigned i = 0; i < I->getNumOperands(); ++i)
+ printType(I->getOperand(i)->getType());
}
}
}
+}
- // printConstant - Print out a constant pool entry...
- void CppWriter::printConstant(const Constant *CV) {
- // First, if the constant is actually a GlobalValue (variable or function)
- // or its already in the constant list then we've printed it already and we
- // can just return.
- if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
- return;
+// printConstant - Print out a constant pool entry...
+void CppWriter::printConstant(const Constant *CV) {
+ // First, if the constant is actually a GlobalValue (variable or function)
+ // or its already in the constant list then we've printed it already and we
+ // can just return.
+ if (isa<GlobalValue>(CV) || ValueNames.find(CV) != ValueNames.end())
+ return;
- std::string constName(getCppName(CV));
- std::string typeName(getCppName(CV->getType()));
+ std::string constName(getCppName(CV));
+ std::string typeName(getCppName(CV->getType()));
- if (isa<GlobalValue>(CV)) {
- // Skip variables and functions, we emit them elsewhere
- return;
- }
+ if (isa<GlobalValue>(CV)) {
+ // Skip variables and functions, we emit them elsewhere
+ return;
+ }
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
- std::string constValue = CI->getValue().toString(10, true);
- Out << "ConstantInt* " << constName
- << " = ConstantInt::get(mod->getContext(), APInt("
- << cast<IntegerType>(CI->getType())->getBitWidth()
- << ", StringRef(\"" << constValue << "\"), 10));";
- } else if (isa<ConstantAggregateZero>(CV)) {
- Out << "ConstantAggregateZero* " << constName
- << " = ConstantAggregateZero::get(" << typeName << ");";
- } else if (isa<ConstantPointerNull>(CV)) {
- Out << "ConstantPointerNull* " << constName
- << " = ConstantPointerNull::get(" << typeName << ");";
- } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
- Out << "ConstantFP* " << constName << " = ";
- printCFP(CFP);
- Out << ";";
- } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
- if (CA->isString() &&
- CA->getType()->getElementType() ==
- Type::getInt8Ty(CA->getContext())) {
- Out << "Constant* " << constName <<
- " = ConstantArray::get(mod->getContext(), \"";
- std::string tmp = CA->getAsString();
- bool nullTerminate = false;
- if (tmp[tmp.length()-1] == 0) {
- tmp.erase(tmp.length()-1);
- nullTerminate = true;
- }
- printEscapedString(tmp);
- // Determine if we want null termination or not.
- if (nullTerminate)
- Out << "\", true"; // Indicate that the null terminator should be
- // added.
- else
- Out << "\", false";// No null terminator
- Out << ");";
- } else {
- Out << "std::vector<Constant*> " << constName << "_elems;";
- nl(Out);
- unsigned N = CA->getNumOperands();
- for (unsigned i = 0; i < N; ++i) {
- printConstant(CA->getOperand(i)); // recurse to print operands
- Out << constName << "_elems.push_back("
- << getCppName(CA->getOperand(i)) << ");";
- nl(Out);
- }
- Out << "Constant* " << constName << " = ConstantArray::get("
- << typeName << ", " << constName << "_elems);";
- }
- } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
- Out << "std::vector<Constant*> " << constName << "_fields;";
- nl(Out);
- unsigned N = CS->getNumOperands();
- for (unsigned i = 0; i < N; i++) {
- printConstant(CS->getOperand(i));
- Out << constName << "_fields.push_back("
- << getCppName(CS->getOperand(i)) << ");";
- nl(Out);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
+ std::string constValue = CI->getValue().toString(10, true);
+ Out << "ConstantInt* " << constName
+ << " = ConstantInt::get(mod->getContext(), APInt("
+ << cast<IntegerType>(CI->getType())->getBitWidth()
+ << ", StringRef(\"" << constValue << "\"), 10));";
+ } else if (isa<ConstantAggregateZero>(CV)) {
+ Out << "ConstantAggregateZero* " << constName
+ << " = ConstantAggregateZero::get(" << typeName << ");";
+ } else if (isa<ConstantPointerNull>(CV)) {
+ Out << "ConstantPointerNull* " << constName
+ << " = ConstantPointerNull::get(" << typeName << ");";
+ } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
+ Out << "ConstantFP* " << constName << " = ";
+ printCFP(CFP);
+ Out << ";";
+ } else if (const ConstantArray *CA = dyn_cast<ConstantArray>(CV)) {
+ if (CA->isString() &&
+ CA->getType()->getElementType() ==
+ Type::getInt8Ty(CA->getContext())) {
+ Out << "Constant* " << constName <<
+ " = ConstantArray::get(mod->getContext(), \"";
+ std::string tmp = CA->getAsString();
+ bool nullTerminate = false;
+ if (tmp[tmp.length()-1] == 0) {
+ tmp.erase(tmp.length()-1);
+ nullTerminate = true;
}
- Out << "Constant* " << constName << " = ConstantStruct::get("
- << typeName << ", " << constName << "_fields);";
- } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ printEscapedString(tmp);
+ // Determine if we want null termination or not.
+ if (nullTerminate)
+ Out << "\", true"; // Indicate that the null terminator should be
+ // added.
+ else
+ Out << "\", false";// No null terminator
+ Out << ");";
+ } else {
Out << "std::vector<Constant*> " << constName << "_elems;";
nl(Out);
- unsigned N = CP->getNumOperands();
+ unsigned N = CA->getNumOperands();
for (unsigned i = 0; i < N; ++i) {
- printConstant(CP->getOperand(i));
+ printConstant(CA->getOperand(i)); // recurse to print operands
Out << constName << "_elems.push_back("
- << getCppName(CP->getOperand(i)) << ");";
+ << getCppName(CA->getOperand(i)) << ");";
nl(Out);
}
- Out << "Constant* " << constName << " = ConstantVector::get("
+ Out << "Constant* " << constName << " = ConstantArray::get("
<< typeName << ", " << constName << "_elems);";
- } else if (isa<UndefValue>(CV)) {
- Out << "UndefValue* " << constName << " = UndefValue::get("
- << typeName << ");";
- } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
- if (CE->getOpcode() == Instruction::GetElementPtr) {
- Out << "std::vector<Constant*> " << constName << "_indices;";
+ }
+ } else if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_fields;";
+ nl(Out);
+ unsigned N = CS->getNumOperands();
+ for (unsigned i = 0; i < N; i++) {
+ printConstant(CS->getOperand(i));
+ Out << constName << "_fields.push_back("
+ << getCppName(CS->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantStruct::get("
+ << typeName << ", " << constName << "_fields);";
+ } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
+ Out << "std::vector<Constant*> " << constName << "_elems;";
+ nl(Out);
+ unsigned N = CP->getNumOperands();
+ for (unsigned i = 0; i < N; ++i) {
+ printConstant(CP->getOperand(i));
+ Out << constName << "_elems.push_back("
+ << getCppName(CP->getOperand(i)) << ");";
+ nl(Out);
+ }
+ Out << "Constant* " << constName << " = ConstantVector::get("
+ << typeName << ", " << constName << "_elems);";
+ } else if (isa<UndefValue>(CV)) {
+ Out << "UndefValue* " << constName << " = UndefValue::get("
+ << typeName << ");";
+ } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) {
+ if (CE->getOpcode() == Instruction::GetElementPtr) {
+ Out << "std::vector<Constant*> " << constName << "_indices;";
+ nl(Out);
+ printConstant(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
+ printConstant(CE->getOperand(i));
+ Out << constName << "_indices.push_back("
+ << getCppName(CE->getOperand(i)) << ");";
nl(Out);
- printConstant(CE->getOperand(0));
- for (unsigned i = 1; i < CE->getNumOperands(); ++i ) {
- printConstant(CE->getOperand(i));
- Out << constName << "_indices.push_back("
- << getCppName(CE->getOperand(i)) << ");";
- nl(Out);
- }
- Out << "Constant* " << constName
- << " = ConstantExpr::getGetElementPtr("
- << getCppName(CE->getOperand(0)) << ", "
- << "&" << constName << "_indices[0], "
- << constName << "_indices.size()"
- << ");";
- } else if (CE->isCast()) {
- printConstant(CE->getOperand(0));
- Out << "Constant* " << constName << " = ConstantExpr::getCast(";
- switch (CE->getOpcode()) {
- default: llvm_unreachable("Invalid cast opcode");
- case Instruction::Trunc: Out << "Instruction::Trunc"; break;
- case Instruction::ZExt: Out << "Instruction::ZExt"; break;
- case Instruction::SExt: Out << "Instruction::SExt"; break;
- case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
- case Instruction::FPExt: Out << "Instruction::FPExt"; break;
- case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
- case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
- case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
- case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
- case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
- case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
- case Instruction::BitCast: Out << "Instruction::BitCast"; break;
- }
- Out << ", " << getCppName(CE->getOperand(0)) << ", "
- << getCppName(CE->getType()) << ");";
- } else {
- unsigned N = CE->getNumOperands();
- for (unsigned i = 0; i < N; ++i ) {
- printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName
+ << " = ConstantExpr::getGetElementPtr("
+ << getCppName(CE->getOperand(0)) << ", "
+ << "&" << constName << "_indices[0], "
+ << constName << "_indices.size()"
+ << ");";
+ } else if (CE->isCast()) {
+ printConstant(CE->getOperand(0));
+ Out << "Constant* " << constName << " = ConstantExpr::getCast(";
+ switch (CE->getOpcode()) {
+ default: llvm_unreachable("Invalid cast opcode");
+ case Instruction::Trunc: Out << "Instruction::Trunc"; break;
+ case Instruction::ZExt: Out << "Instruction::ZExt"; break;
+ case Instruction::SExt: Out << "Instruction::SExt"; break;
+ case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break;
+ case Instruction::FPExt: Out << "Instruction::FPExt"; break;
+ case Instruction::FPToUI: Out << "Instruction::FPToUI"; break;
+ case Instruction::FPToSI: Out << "Instruction::FPToSI"; break;
+ case Instruction::UIToFP: Out << "Instruction::UIToFP"; break;
+ case Instruction::SIToFP: Out << "Instruction::SIToFP"; break;
+ case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break;
+ case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break;
+ case Instruction::BitCast: Out << "Instruction::BitCast"; break;
+ }
+ Out << ", " << getCppName(CE->getOperand(0)) << ", "
+ << getCppName(CE->getType()) << ");";
+ } else {
+ unsigned N = CE->getNumOperands();
+ for (unsigned i = 0; i < N; ++i ) {
+ printConstant(CE->getOperand(i));
+ }
+ Out << "Constant* " << constName << " = ConstantExpr::";
+ switch (CE->getOpcode()) {
+ case Instruction::Add: Out << "getAdd("; break;
+ case Instruction::FAdd: Out << "getFAdd("; break;
+ case Instruction::Sub: Out << "getSub("; break;
+ case Instruction::FSub: Out << "getFSub("; break;
+ case Instruction::Mul: Out << "getMul("; break;
+ case Instruction::FMul: Out << "getFMul("; break;
+ case Instruction::UDiv: Out << "getUDiv("; break;
+ case Instruction::SDiv: Out << "getSDiv("; break;
+ case Instruction::FDiv: Out << "getFDiv("; break;
+ case Instruction::URem: Out << "getURem("; break;
+ case Instruction::SRem: Out << "getSRem("; break;
+ case Instruction::FRem: Out << "getFRem("; break;
+ case Instruction::And: Out << "getAnd("; break;
+ case Instruction::Or: Out << "getOr("; break;
+ case Instruction::Xor: Out << "getXor("; break;
+ case Instruction::ICmp:
+ Out << "getICmp(ICmpInst::ICMP_";
+ switch (CE->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "NE"; break;
+ case ICmpInst::ICMP_SLT: Out << "SLT"; break;
+ case ICmpInst::ICMP_ULT: Out << "ULT"; break;
+ case ICmpInst::ICMP_SGT: Out << "SGT"; break;
+ case ICmpInst::ICMP_UGT: Out << "UGT"; break;
+ case ICmpInst::ICMP_SLE: Out << "SLE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ULE"; break;
+ case ICmpInst::ICMP_SGE: Out << "SGE"; break;
+ case ICmpInst::ICMP_UGE: Out << "UGE"; break;
+ default: error("Invalid ICmp Predicate");
}
- Out << "Constant* " << constName << " = ConstantExpr::";
- switch (CE->getOpcode()) {
- case Instruction::Add: Out << "getAdd("; break;
- case Instruction::FAdd: Out << "getFAdd("; break;
- case Instruction::Sub: Out << "getSub("; break;
- case Instruction::FSub: Out << "getFSub("; break;
- case Instruction::Mul: Out << "getMul("; break;
- case Instruction::FMul: Out << "getFMul("; break;
- case Instruction::UDiv: Out << "getUDiv("; break;
- case Instruction::SDiv: Out << "getSDiv("; break;
- case Instruction::FDiv: Out << "getFDiv("; break;
- case Instruction::URem: Out << "getURem("; break;
- case Instruction::SRem: Out << "getSRem("; break;
- case Instruction::FRem: Out << "getFRem("; break;
- case Instruction::And: Out << "getAnd("; break;
- case Instruction::Or: Out << "getOr("; break;
- case Instruction::Xor: Out << "getXor("; break;
- case Instruction::ICmp:
- Out << "getICmp(ICmpInst::ICMP_";
- switch (CE->getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << "EQ"; break;
- case ICmpInst::ICMP_NE: Out << "NE"; break;
- case ICmpInst::ICMP_SLT: Out << "SLT"; break;
- case ICmpInst::ICMP_ULT: Out << "ULT"; break;
- case ICmpInst::ICMP_SGT: Out << "SGT"; break;
- case ICmpInst::ICMP_UGT: Out << "UGT"; break;
- case ICmpInst::ICMP_SLE: Out << "SLE"; break;
- case ICmpInst::ICMP_ULE: Out << "ULE"; break;
- case ICmpInst::ICMP_SGE: Out << "SGE"; break;
- case ICmpInst::ICMP_UGE: Out << "UGE"; break;
- default: error("Invalid ICmp Predicate");
- }
- break;
- case Instruction::FCmp:
- Out << "getFCmp(FCmpInst::FCMP_";
- switch (CE->getPredicate()) {
- case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
- case FCmpInst::FCMP_ORD: Out << "ORD"; break;
- case FCmpInst::FCMP_UNO: Out << "UNO"; break;
- case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
- case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
- case FCmpInst::FCMP_ONE: Out << "ONE"; break;
- case FCmpInst::FCMP_UNE: Out << "UNE"; break;
- case FCmpInst::FCMP_OLT: Out << "OLT"; break;
- case FCmpInst::FCMP_ULT: Out << "ULT"; break;
- case FCmpInst::FCMP_OGT: Out << "OGT"; break;
- case FCmpInst::FCMP_UGT: Out << "UGT"; break;
- case FCmpInst::FCMP_OLE: Out << "OLE"; break;
- case FCmpInst::FCMP_ULE: Out << "ULE"; break;
- case FCmpInst::FCMP_OGE: Out << "OGE"; break;
- case FCmpInst::FCMP_UGE: Out << "UGE"; break;
- case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
- default: error("Invalid FCmp Predicate");
- }
- break;
- case Instruction::Shl: Out << "getShl("; break;
- case Instruction::LShr: Out << "getLShr("; break;
- case Instruction::AShr: Out << "getAShr("; break;
- case Instruction::Select: Out << "getSelect("; break;
- case Instruction::ExtractElement: Out << "getExtractElement("; break;
- case Instruction::InsertElement: Out << "getInsertElement("; break;
- case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
- default:
- error("Invalid constant expression");
- break;
+ break;
+ case Instruction::FCmp:
+ Out << "getFCmp(FCmpInst::FCMP_";
+ switch (CE->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FALSE"; break;
+ case FCmpInst::FCMP_ORD: Out << "ORD"; break;
+ case FCmpInst::FCMP_UNO: Out << "UNO"; break;
+ case FCmpInst::FCMP_OEQ: Out << "OEQ"; break;
+ case FCmpInst::FCMP_UEQ: Out << "UEQ"; break;
+ case FCmpInst::FCMP_ONE: Out << "ONE"; break;
+ case FCmpInst::FCMP_UNE: Out << "UNE"; break;
+ case FCmpInst::FCMP_OLT: Out << "OLT"; break;
+ case FCmpInst::FCMP_ULT: Out << "ULT"; break;
+ case FCmpInst::FCMP_OGT: Out << "OGT"; break;
+ case FCmpInst::FCMP_UGT: Out << "UGT"; break;
+ case FCmpInst::FCMP_OLE: Out << "OLE"; break;
+ case FCmpInst::FCMP_ULE: Out << "ULE"; break;
+ case FCmpInst::FCMP_OGE: Out << "OGE"; break;
+ case FCmpInst::FCMP_UGE: Out << "UGE"; break;
+ case FCmpInst::FCMP_TRUE: Out << "TRUE"; break;
+ default: error("Invalid FCmp Predicate");
}
- Out << getCppName(CE->getOperand(0));
- for (unsigned i = 1; i < CE->getNumOperands(); ++i)
- Out << ", " << getCppName(CE->getOperand(i));
- Out << ");";
+ break;
+ case Instruction::Shl: Out << "getShl("; break;
+ case Instruction::LShr: Out << "getLShr("; break;
+ case Instruction::AShr: Out << "getAShr("; break;
+ case Instruction::Select: Out << "getSelect("; break;
+ case Instruction::ExtractElement: Out << "getExtractElement("; break;
+ case Instruction::InsertElement: Out << "getInsertElement("; break;
+ case Instruction::ShuffleVector: Out << "getShuffleVector("; break;
+ default:
+ error("Invalid constant expression");
+ break;
}
- } else {
- error("Bad Constant");
- Out << "Constant* " << constName << " = 0; ";
+ Out << getCppName(CE->getOperand(0));
+ for (unsigned i = 1; i < CE->getNumOperands(); ++i)
+ Out << ", " << getCppName(CE->getOperand(i));
+ Out << ");";
}
- nl(Out);
+ } else if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) {
+ Out << "Constant* " << constName << " = ";
+ Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");";
+ } else {
+ error("Bad Constant");
+ Out << "Constant* " << constName << " = 0; ";
}
+ nl(Out);
+}
- void CppWriter::printConstants(const Module* M) {
- // Traverse all the global variables looking for constant initializers
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I)
- if (I->hasInitializer())
- printConstant(I->getInitializer());
-
- // Traverse the LLVM functions looking for constants
- for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
- FI != FE; ++FI) {
- // Add all of the basic blocks and instructions
- for (Function::const_iterator BB = FI->begin(),
- E = FI->end(); BB != E; ++BB) {
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
- ++I) {
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
- printConstant(C);
- }
+void CppWriter::printConstants(const Module* M) {
+ // Traverse all the global variables looking for constant initializers
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I)
+ if (I->hasInitializer())
+ printConstant(I->getInitializer());
+
+ // Traverse the LLVM functions looking for constants
+ for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end();
+ FI != FE; ++FI) {
+ // Add all of the basic blocks and instructions
+ for (Function::const_iterator BB = FI->begin(),
+ E = FI->end(); BB != E; ++BB) {
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;
+ ++I) {
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (Constant* C = dyn_cast<Constant>(I->getOperand(i))) {
+ printConstant(C);
}
}
}
}
}
+}
- void CppWriter::printVariableUses(const GlobalVariable *GV) {
- nl(Out) << "// Type Definitions";
- nl(Out);
- printType(GV->getType());
- if (GV->hasInitializer()) {
- Constant *Init = GV->getInitializer();
- printType(Init->getType());
- if (Function *F = dyn_cast<Function>(Init)) {
- nl(Out)<< "/ Function Declarations"; nl(Out);
- printFunctionHead(F);
- } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
- nl(Out) << "// Global Variable Declarations"; nl(Out);
- printVariableHead(gv);
-
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- printVariableBody(gv);
- } else {
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstant(Init);
- }
+void CppWriter::printVariableUses(const GlobalVariable *GV) {
+ nl(Out) << "// Type Definitions";
+ nl(Out);
+ printType(GV->getType());
+ if (GV->hasInitializer()) {
+ Constant *Init = GV->getInitializer();
+ printType(Init->getType());
+ if (Function *F = dyn_cast<Function>(Init)) {
+ nl(Out)<< "/ Function Declarations"; nl(Out);
+ printFunctionHead(F);
+ } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ printVariableHead(gv);
+
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
}
}
+}
- void CppWriter::printVariableHead(const GlobalVariable *GV) {
- nl(Out) << "GlobalVariable* " << getCppName(GV);
- if (is_inline) {
- Out << " = mod->getGlobalVariable(mod->getContext(), ";
- printEscapedString(GV->getName());
- Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
- nl(Out) << "if (!" << getCppName(GV) << ") {";
- in(); nl(Out) << getCppName(GV);
- }
- Out << " = new GlobalVariable(/*Module=*/*mod, ";
- nl(Out) << "/*Type=*/";
- printCppName(GV->getType()->getElementType());
- Out << ",";
- nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
- Out << ",";
- nl(Out) << "/*Linkage=*/";
- printLinkageType(GV->getLinkage());
- Out << ",";
- nl(Out) << "/*Initializer=*/0, ";
- if (GV->hasInitializer()) {
- Out << "// has initializer, specified below";
- }
- nl(Out) << "/*Name=*/\"";
+void CppWriter::printVariableHead(const GlobalVariable *GV) {
+ nl(Out) << "GlobalVariable* " << getCppName(GV);
+ if (is_inline) {
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
printEscapedString(GV->getName());
+ Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
+ nl(Out) << "if (!" << getCppName(GV) << ") {";
+ in(); nl(Out) << getCppName(GV);
+ }
+ Out << " = new GlobalVariable(/*Module=*/*mod, ";
+ nl(Out) << "/*Type=*/";
+ printCppName(GV->getType()->getElementType());
+ Out << ",";
+ nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false");
+ Out << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(GV->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Initializer=*/0, ";
+ if (GV->hasInitializer()) {
+ Out << "// has initializer, specified below";
+ }
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(GV->getName());
+ Out << "\");";
+ nl(Out);
+
+ if (GV->hasSection()) {
+ printCppName(GV);
+ Out << "->setSection(\"";
+ printEscapedString(GV->getSection());
Out << "\");";
nl(Out);
-
- if (GV->hasSection()) {
- printCppName(GV);
- Out << "->setSection(\"";
- printEscapedString(GV->getSection());
- Out << "\");";
- nl(Out);
- }
- if (GV->getAlignment()) {
- printCppName(GV);
- Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
- nl(Out);
- }
- if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
- printCppName(GV);
- Out << "->setVisibility(";
- printVisibilityType(GV->getVisibility());
- Out << ");";
- nl(Out);
- }
- if (GV->isThreadLocal()) {
- printCppName(GV);
- Out << "->setThreadLocal(true);";
- nl(Out);
- }
- if (is_inline) {
- out(); Out << "}"; nl(Out);
- }
}
-
- void CppWriter::printVariableBody(const GlobalVariable *GV) {
- if (GV->hasInitializer()) {
- printCppName(GV);
- Out << "->setInitializer(";
- Out << getCppName(GV->getInitializer()) << ");";
- nl(Out);
- }
+ if (GV->getAlignment()) {
+ printCppName(GV);
+ Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");";
+ nl(Out);
}
+ if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+ printCppName(GV);
+ Out << "->setVisibility(";
+ printVisibilityType(GV->getVisibility());
+ Out << ");";
+ nl(Out);
+ }
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
+ if (is_inline) {
+ out(); Out << "}"; nl(Out);
+ }
+}
- std::string CppWriter::getOpName(Value* V) {
- if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
- return getCppName(V);
-
- // See if its alread in the map of forward references, if so just return the
- // name we already set up for it
- ForwardRefMap::const_iterator I = ForwardRefs.find(V);
- if (I != ForwardRefs.end())
- return I->second;
-
- // This is a new forward reference. Generate a unique name for it
- std::string result(std::string("fwdref_") + utostr(uniqueNum++));
-
- // Yes, this is a hack. An Argument is the smallest instantiable value that
- // we can make as a placeholder for the real value. We'll replace these
- // Argument instances later.
- Out << "Argument* " << result << " = new Argument("
- << getCppName(V->getType()) << ");";
+void CppWriter::printVariableBody(const GlobalVariable *GV) {
+ if (GV->hasInitializer()) {
+ printCppName(GV);
+ Out << "->setInitializer(";
+ Out << getCppName(GV->getInitializer()) << ");";
nl(Out);
- ForwardRefs[V] = result;
- return result;
}
+}
- // printInstruction - This member is called for each Instruction in a function.
- void CppWriter::printInstruction(const Instruction *I,
- const std::string& bbname) {
- std::string iName(getCppName(I));
+std::string CppWriter::getOpName(Value* V) {
+ if (!isa<Instruction>(V) || DefinedValues.find(V) != DefinedValues.end())
+ return getCppName(V);
- // Before we emit this instruction, we need to take care of generating any
- // forward references. So, we get the names of all the operands in advance
- const unsigned Ops(I->getNumOperands());
- std::string* opNames = new std::string[Ops];
- for (unsigned i = 0; i < Ops; i++) {
- opNames[i] = getOpName(I->getOperand(i));
- }
+ // See if its alread in the map of forward references, if so just return the
+ // name we already set up for it
+ ForwardRefMap::const_iterator I = ForwardRefs.find(V);
+ if (I != ForwardRefs.end())
+ return I->second;
- switch (I->getOpcode()) {
- default:
- error("Invalid instruction");
- break;
+ // This is a new forward reference. Generate a unique name for it
+ std::string result(std::string("fwdref_") + utostr(uniqueNum++));
- case Instruction::Ret: {
- const ReturnInst* ret = cast<ReturnInst>(I);
- Out << "ReturnInst::Create(mod->getContext(), "
- << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
- break;
+ // Yes, this is a hack. An Argument is the smallest instantiable value that
+ // we can make as a placeholder for the real value. We'll replace these
+ // Argument instances later.
+ Out << "Argument* " << result << " = new Argument("
+ << getCppName(V->getType()) << ");";
+ nl(Out);
+ ForwardRefs[V] = result;
+ return result;
+}
+
+// printInstruction - This member is called for each Instruction in a function.
+void CppWriter::printInstruction(const Instruction *I,
+ const std::string& bbname) {
+ std::string iName(getCppName(I));
+
+ // Before we emit this instruction, we need to take care of generating any
+ // forward references. So, we get the names of all the operands in advance
+ const unsigned Ops(I->getNumOperands());
+ std::string* opNames = new std::string[Ops];
+ for (unsigned i = 0; i < Ops; i++)
+ opNames[i] = getOpName(I->getOperand(i));
+
+ switch (I->getOpcode()) {
+ default:
+ error("Invalid instruction");
+ break;
+
+ case Instruction::Ret: {
+ const ReturnInst* ret = cast<ReturnInst>(I);
+ Out << "ReturnInst::Create(mod->getContext(), "
+ << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
+ break;
+ }
+ case Instruction::Br: {
+ const BranchInst* br = cast<BranchInst>(I);
+ Out << "BranchInst::Create(" ;
+ if (br->getNumOperands() == 3) {
+ Out << opNames[2] << ", "
+ << opNames[1] << ", "
+ << opNames[0] << ", ";
+
+ } else if (br->getNumOperands() == 1) {
+ Out << opNames[0] << ", ";
+ } else {
+ error("Branch with 2 operands?");
}
- case Instruction::Br: {
- const BranchInst* br = cast<BranchInst>(I);
- Out << "BranchInst::Create(" ;
- if (br->getNumOperands() == 3 ) {
- Out << opNames[2] << ", "
- << opNames[1] << ", "
- << opNames[0] << ", ";
-
- } else if (br->getNumOperands() == 1) {
- Out << opNames[0] << ", ";
- } else {
- error("Branch with 2 operands?");
- }
- Out << bbname << ");";
- break;
+ Out << bbname << ");";
+ break;
+ }
+ case Instruction::Switch: {
+ const SwitchInst *SI = cast<SwitchInst>(I);
+ Out << "SwitchInst* " << iName << " = SwitchInst::Create("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << SI->getNumCases() << ", " << bbname << ");";
+ nl(Out);
+ for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
+ Out << iName << "->addCase("
+ << opNames[i] << ", "
+ << opNames[i+1] << ");";
+ nl(Out);
}
- case Instruction::Switch: {
- const SwitchInst *SI = cast<SwitchInst>(I);
- Out << "SwitchInst* " << iName << " = SwitchInst::Create("
- << opNames[0] << ", "
- << opNames[1] << ", "
- << SI->getNumCases() << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::IndirectBr: {
+ const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
+ Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
+ << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ nl(Out);
+ for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
+ Out << iName << "->addDestination(" << opNames[i] << ");";
nl(Out);
- for (unsigned i = 2; i != SI->getNumOperands(); i += 2) {
- Out << iName << "->addCase("
- << opNames[i] << ", "
- << opNames[i+1] << ");";
- nl(Out);
- }
- break;
}
- case Instruction::IndirectBr: {
- const IndirectBrInst *IBI = cast<IndirectBrInst>(I);
- Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create("
- << opNames[0] << ", " << IBI->getNumDestinations() << ");";
+ break;
+ }
+ case Instruction::Invoke: {
+ const InvokeInst* inv = cast<InvokeInst>(I);
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back("
+ << getOpName(inv->getArgOperand(i)) << ");";
nl(Out);
- for (unsigned i = 1; i != IBI->getNumOperands(); ++i) {
- Out << iName << "->addDestination(" << opNames[i] << ");";
- nl(Out);
- }
- break;
}
- case Instruction::Invoke: {
- const InvokeInst* inv = cast<InvokeInst>(I);
- Out << "std::vector<Value*> " << iName << "_params;";
+ // FIXME: This shouldn't use magic numbers -3, -2, and -1.
+ Out << "InvokeInst *" << iName << " = InvokeInst::Create("
+ << getOpName(inv->getCalledFunction()) << ", "
+ << getOpName(inv->getNormalDest()) << ", "
+ << getOpName(inv->getUnwindDest()) << ", "
+ << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ printEscapedString(inv->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(inv->getCallingConv());
+ Out << ");";
+ printAttributes(inv->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Unwind: {
+ Out << "new UnwindInst("
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Unreachable: {
+ Out << "new UnreachableInst("
+ << "mod->getContext(), "
+ << bbname << ");";
+ break;
+ }
+ case Instruction::Add:
+ case Instruction::FAdd:
+ case Instruction::Sub:
+ case Instruction::FSub:
+ case Instruction::Mul:
+ case Instruction::FMul:
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::FDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::FRem:
+ case Instruction::And:
+ case Instruction::Or:
+ case Instruction::Xor:
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:{
+ Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
+ switch (I->getOpcode()) {
+ case Instruction::Add: Out << "Instruction::Add"; break;
+ case Instruction::FAdd: Out << "Instruction::FAdd"; break;
+ case Instruction::Sub: Out << "Instruction::Sub"; break;
+ case Instruction::FSub: Out << "Instruction::FSub"; break;
+ case Instruction::Mul: Out << "Instruction::Mul"; break;
+ case Instruction::FMul: Out << "Instruction::FMul"; break;
+ case Instruction::UDiv:Out << "Instruction::UDiv"; break;
+ case Instruction::SDiv:Out << "Instruction::SDiv"; break;
+ case Instruction::FDiv:Out << "Instruction::FDiv"; break;
+ case Instruction::URem:Out << "Instruction::URem"; break;
+ case Instruction::SRem:Out << "Instruction::SRem"; break;
+ case Instruction::FRem:Out << "Instruction::FRem"; break;
+ case Instruction::And: Out << "Instruction::And"; break;
+ case Instruction::Or: Out << "Instruction::Or"; break;
+ case Instruction::Xor: Out << "Instruction::Xor"; break;
+ case Instruction::Shl: Out << "Instruction::Shl"; break;
+ case Instruction::LShr:Out << "Instruction::LShr"; break;
+ case Instruction::AShr:Out << "Instruction::AShr"; break;
+ default: Out << "Instruction::BadOpCode"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::FCmp: {
+ Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
+ switch (cast<FCmpInst>(I)->getPredicate()) {
+ case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
+ case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
+ case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
+ case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
+ case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
+ case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
+ case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
+ case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
+ case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
+ case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
+ case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
+ case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
+ case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
+ case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
+ case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
+ case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
+ default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::ICmp: {
+ Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
+ switch (cast<ICmpInst>(I)->getPredicate()) {
+ case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
+ case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
+ case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
+ case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
+ case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
+ case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
+ case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
+ case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
+ case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
+ case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
+ default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
+ }
+ Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
+ printEscapedString(I->getName());
+ Out << "\");";
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst* allocaI = cast<AllocaInst>(I);
+ Out << "AllocaInst* " << iName << " = new AllocaInst("
+ << getCppName(allocaI->getAllocatedType()) << ", ";
+ if (allocaI->isArrayAllocation())
+ Out << opNames[0] << ", ";
+ Out << "\"";
+ printEscapedString(allocaI->getName());
+ Out << "\", " << bbname << ");";
+ if (allocaI->getAlignment())
+ nl(Out) << iName << "->setAlignment("
+ << allocaI->getAlignment() << ");";
+ break;
+ }
+ case Instruction::Load: {
+ const LoadInst* load = cast<LoadInst>(I);
+ Out << "LoadInst* " << iName << " = new LoadInst("
+ << opNames[0] << ", \"";
+ printEscapedString(load->getName());
+ Out << "\", " << (load->isVolatile() ? "true" : "false" )
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::Store: {
+ const StoreInst* store = cast<StoreInst>(I);
+ Out << " new StoreInst("
+ << opNames[0] << ", "
+ << opNames[1] << ", "
+ << (store->isVolatile() ? "true" : "false")
+ << ", " << bbname << ");";
+ break;
+ }
+ case Instruction::GetElementPtr: {
+ const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
+ if (gep->getNumOperands() <= 2) {
+ Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0];
+ if (gep->getNumOperands() == 2)
+ Out << ", " << opNames[1];
+ } else {
+ Out << "std::vector<Value*> " << iName << "_indices;";
nl(Out);
- for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) {
- Out << iName << "_params.push_back("
+ for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
+ Out << iName << "_indices.push_back("
<< opNames[i] << ");";
nl(Out);
}
- Out << "InvokeInst *" << iName << " = InvokeInst::Create("
- << opNames[Ops - 3] << ", "
- << opNames[Ops - 2] << ", "
- << opNames[Ops - 1] << ", "
- << iName << "_params.begin(), " << iName << "_params.end(), \"";
- printEscapedString(inv->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->setCallingConv(";
- printCallingConv(inv->getCallingConv());
- Out << ");";
- printAttributes(inv->getAttributes(), iName);
- Out << iName << "->setAttributes(" << iName << "_PAL);";
+ Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
+ << opNames[0] << ", " << iName << "_indices.begin(), "
+ << iName << "_indices.end()";
+ }
+ Out << ", \"";
+ printEscapedString(gep->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::PHI: {
+ const PHINode* phi = cast<PHINode>(I);
+
+ Out << "PHINode* " << iName << " = PHINode::Create("
+ << getCppName(phi->getType()) << ", \"";
+ printEscapedString(phi->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->reserveOperandSpace("
+ << phi->getNumIncomingValues()
+ << ");";
+ nl(Out);
+ for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+ Out << iName << "->addIncoming("
+ << opNames[i] << ", " << opNames[i+1] << ");";
nl(Out);
- break;
- }
- case Instruction::Unwind: {
- Out << "new UnwindInst("
- << bbname << ");";
- break;
- }
- case Instruction::Unreachable: {
- Out << "new UnreachableInst("
- << "mod->getContext(), "
- << bbname << ");";
- break;
- }
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:{
- Out << "BinaryOperator* " << iName << " = BinaryOperator::Create(";
- switch (I->getOpcode()) {
- case Instruction::Add: Out << "Instruction::Add"; break;
- case Instruction::FAdd: Out << "Instruction::FAdd"; break;
- case Instruction::Sub: Out << "Instruction::Sub"; break;
- case Instruction::FSub: Out << "Instruction::FSub"; break;
- case Instruction::Mul: Out << "Instruction::Mul"; break;
- case Instruction::FMul: Out << "Instruction::FMul"; break;
- case Instruction::UDiv:Out << "Instruction::UDiv"; break;
- case Instruction::SDiv:Out << "Instruction::SDiv"; break;
- case Instruction::FDiv:Out << "Instruction::FDiv"; break;
- case Instruction::URem:Out << "Instruction::URem"; break;
- case Instruction::SRem:Out << "Instruction::SRem"; break;
- case Instruction::FRem:Out << "Instruction::FRem"; break;
- case Instruction::And: Out << "Instruction::And"; break;
- case Instruction::Or: Out << "Instruction::Or"; break;
- case Instruction::Xor: Out << "Instruction::Xor"; break;
- case Instruction::Shl: Out << "Instruction::Shl"; break;
- case Instruction::LShr:Out << "Instruction::LShr"; break;
- case Instruction::AShr:Out << "Instruction::AShr"; break;
- default: Out << "Instruction::BadOpCode"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::FCmp: {
- Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", ";
- switch (cast<FCmpInst>(I)->getPredicate()) {
- case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break;
- case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break;
- case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break;
- case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break;
- case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break;
- case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break;
- case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break;
- case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break;
- case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break;
- case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break;
- case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break;
- case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break;
- case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break;
- case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break;
- case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break;
- case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break;
- default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\");";
- break;
- }
- case Instruction::ICmp: {
- Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", ";
- switch (cast<ICmpInst>(I)->getPredicate()) {
- case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break;
- case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break;
- case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break;
- case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break;
- case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break;
- case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break;
- case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break;
- case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break;
- case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break;
- case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break;
- default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break;
- }
- Out << ", " << opNames[0] << ", " << opNames[1] << ", \"";
- printEscapedString(I->getName());
- Out << "\");";
- break;
- }
- case Instruction::Alloca: {
- const AllocaInst* allocaI = cast<AllocaInst>(I);
- Out << "AllocaInst* " << iName << " = new AllocaInst("
- << getCppName(allocaI->getAllocatedType()) << ", ";
- if (allocaI->isArrayAllocation())
- Out << opNames[0] << ", ";
- Out << "\"";
- printEscapedString(allocaI->getName());
- Out << "\", " << bbname << ");";
- if (allocaI->getAlignment())
- nl(Out) << iName << "->setAlignment("
- << allocaI->getAlignment() << ");";
- break;
- }
- case Instruction::Load:{
- const LoadInst* load = cast<LoadInst>(I);
- Out << "LoadInst* " << iName << " = new LoadInst("
- << opNames[0] << ", \"";
- printEscapedString(load->getName());
- Out << "\", " << (load->isVolatile() ? "true" : "false" )
- << ", " << bbname << ");";
- break;
- }
- case Instruction::Store: {
- const StoreInst* store = cast<StoreInst>(I);
- Out << " new StoreInst("
- << opNames[0] << ", "
- << opNames[1] << ", "
- << (store->isVolatile() ? "true" : "false")
- << ", " << bbname << ");";
- break;
- }
- case Instruction::GetElementPtr: {
- const GetElementPtrInst* gep = cast<GetElementPtrInst>(I);
- if (gep->getNumOperands() <= 2) {
- Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create("
- << opNames[0];
- if (gep->getNumOperands() == 2)
- Out << ", " << opNames[1];
- } else {
- Out << "std::vector<Value*> " << iName << "_indices;";
- nl(Out);
- for (unsigned i = 1; i < gep->getNumOperands(); ++i ) {
- Out << iName << "_indices.push_back("
- << opNames[i] << ");";
- nl(Out);
- }
- Out << "Instruction* " << iName << " = GetElementPtrInst::Create("
- << opNames[0] << ", " << iName << "_indices.begin(), "
- << iName << "_indices.end()";
- }
- Out << ", \"";
- printEscapedString(gep->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::PHI: {
- const PHINode* phi = cast<PHINode>(I);
-
- Out << "PHINode* " << iName << " = PHINode::Create("
- << getCppName(phi->getType()) << ", \"";
- printEscapedString(phi->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->reserveOperandSpace("
- << phi->getNumIncomingValues()
- << ");";
+ break;
+ }
+ case Instruction::Trunc:
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ case Instruction::FPTrunc:
+ case Instruction::FPExt:
+ case Instruction::FPToUI:
+ case Instruction::FPToSI:
+ case Instruction::UIToFP:
+ case Instruction::SIToFP:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::BitCast: {
+ const CastInst* cst = cast<CastInst>(I);
+ Out << "CastInst* " << iName << " = new ";
+ switch (I->getOpcode()) {
+ case Instruction::Trunc: Out << "TruncInst"; break;
+ case Instruction::ZExt: Out << "ZExtInst"; break;
+ case Instruction::SExt: Out << "SExtInst"; break;
+ case Instruction::FPTrunc: Out << "FPTruncInst"; break;
+ case Instruction::FPExt: Out << "FPExtInst"; break;
+ case Instruction::FPToUI: Out << "FPToUIInst"; break;
+ case Instruction::FPToSI: Out << "FPToSIInst"; break;
+ case Instruction::UIToFP: Out << "UIToFPInst"; break;
+ case Instruction::SIToFP: Out << "SIToFPInst"; break;
+ case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
+ case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
+ case Instruction::BitCast: Out << "BitCastInst"; break;
+ default: assert(!"Unreachable"); break;
+ }
+ Out << "(" << opNames[0] << ", "
+ << getCppName(cst->getType()) << ", \"";
+ printEscapedString(cst->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::Call: {
+ const CallInst* call = cast<CallInst>(I);
+ if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
+ Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
+ << getCppName(ila->getFunctionType()) << ", \""
+ << ila->getAsmString() << "\", \""
+ << ila->getConstraintString() << "\","
+ << (ila->hasSideEffects() ? "true" : "false") << ");";
nl(Out);
- for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
- Out << iName << "->addIncoming("
- << opNames[i] << ", " << opNames[i+1] << ");";
- nl(Out);
- }
- break;
- }
- case Instruction::Trunc:
- case Instruction::ZExt:
- case Instruction::SExt:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::PtrToInt:
- case Instruction::IntToPtr:
- case Instruction::BitCast: {
- const CastInst* cst = cast<CastInst>(I);
- Out << "CastInst* " << iName << " = new ";
- switch (I->getOpcode()) {
- case Instruction::Trunc: Out << "TruncInst"; break;
- case Instruction::ZExt: Out << "ZExtInst"; break;
- case Instruction::SExt: Out << "SExtInst"; break;
- case Instruction::FPTrunc: Out << "FPTruncInst"; break;
- case Instruction::FPExt: Out << "FPExtInst"; break;
- case Instruction::FPToUI: Out << "FPToUIInst"; break;
- case Instruction::FPToSI: Out << "FPToSIInst"; break;
- case Instruction::UIToFP: Out << "UIToFPInst"; break;
- case Instruction::SIToFP: Out << "SIToFPInst"; break;
- case Instruction::PtrToInt: Out << "PtrToIntInst"; break;
- case Instruction::IntToPtr: Out << "IntToPtrInst"; break;
- case Instruction::BitCast: Out << "BitCastInst"; break;
- default: assert(!"Unreachable"); break;
- }
- Out << "(" << opNames[0] << ", "
- << getCppName(cst->getType()) << ", \"";
- printEscapedString(cst->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::Call:{
- const CallInst* call = cast<CallInst>(I);
- if (const InlineAsm* ila = dyn_cast<InlineAsm>(call->getCalledValue())) {
- Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get("
- << getCppName(ila->getFunctionType()) << ", \""
- << ila->getAsmString() << "\", \""
- << ila->getConstraintString() << "\","
- << (ila->hasSideEffects() ? "true" : "false") << ");";
- nl(Out);
- }
- if (call->getNumOperands() > 2) {
- Out << "std::vector<Value*> " << iName << "_params;";
+ if (call->getNumArgOperands() > 1) {
+ Out << "std::vector<Value*> " << iName << "_params;";
+ nl(Out);
+ for (unsigned i = 0; i < call->getNumArgOperands(); ++i) {
+ Out << iName << "_params.push_back(" << opNames[i] << ");";
nl(Out);
- for (unsigned i = 1; i < call->getNumOperands(); ++i) {
- Out << iName << "_params.push_back(" << opNames[i] << ");";
- nl(Out);
- }
- Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[0] << ", " << iName << "_params.begin(), "
- << iName << "_params.end(), \"";
- } else if (call->getNumOperands() == 2) {
- Out << "CallInst* " << iName << " = CallInst::Create("
- << opNames[0] << ", " << opNames[1] << ", \"";
- } else {
- Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0]
- << ", \"";
}
- printEscapedString(call->getName());
- Out << "\", " << bbname << ");";
- nl(Out) << iName << "->setCallingConv(";
- printCallingConv(call->getCallingConv());
- Out << ");";
- nl(Out) << iName << "->setTailCall("
- << (call->isTailCall() ? "true":"false");
- Out << ");";
- printAttributes(call->getAttributes(), iName);
- Out << iName << "->setAttributes(" << iName << "_PAL);";
- nl(Out);
- break;
- }
- case Instruction::Select: {
- const SelectInst* sel = cast<SelectInst>(I);
- Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
- Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(sel->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::UserOp1:
- /// FALL THROUGH
- case Instruction::UserOp2: {
- /// FIXME: What should be done here?
- break;
- }
- case Instruction::VAArg: {
- const VAArgInst* va = cast<VAArgInst>(I);
- Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
- << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
- printEscapedString(va->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::ExtractElement: {
- const ExtractElementInst* eei = cast<ExtractElementInst>(I);
- Out << "ExtractElementInst* " << getCppName(eei)
- << " = new ExtractElementInst(" << opNames[0]
- << ", " << opNames[1] << ", \"";
- printEscapedString(eei->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::InsertElement: {
- const InsertElementInst* iei = cast<InsertElementInst>(I);
- Out << "InsertElementInst* " << getCppName(iei)
- << " = InsertElementInst::Create(" << opNames[0]
- << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(iei->getName());
- Out << "\", " << bbname << ");";
- break;
- }
- case Instruction::ShuffleVector: {
- const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
- Out << "ShuffleVectorInst* " << getCppName(svi)
- << " = new ShuffleVectorInst(" << opNames[0]
- << ", " << opNames[1] << ", " << opNames[2] << ", \"";
- printEscapedString(svi->getName());
- Out << "\", " << bbname << ");";
- break;
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), "
+ << iName << "_params.end(), \"";
+ } else if (call->getNumArgOperands() == 1) {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \"";
+ } else {
+ Out << "CallInst* " << iName << " = CallInst::Create("
+ << opNames[call->getNumArgOperands()] << ", \"";
}
- case Instruction::ExtractValue: {
- const ExtractValueInst *evi = cast<ExtractValueInst>(I);
- Out << "std::vector<unsigned> " << iName << "_indices;";
+ printEscapedString(call->getName());
+ Out << "\", " << bbname << ");";
+ nl(Out) << iName << "->setCallingConv(";
+ printCallingConv(call->getCallingConv());
+ Out << ");";
+ nl(Out) << iName << "->setTailCall("
+ << (call->isTailCall() ? "true" : "false");
+ Out << ");";
+ nl(Out);
+ printAttributes(call->getAttributes(), iName);
+ Out << iName << "->setAttributes(" << iName << "_PAL);";
+ nl(Out);
+ break;
+ }
+ case Instruction::Select: {
+ const SelectInst* sel = cast<SelectInst>(I);
+ Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create(";
+ Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(sel->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::UserOp1:
+ /// FALL THROUGH
+ case Instruction::UserOp2: {
+ /// FIXME: What should be done here?
+ break;
+ }
+ case Instruction::VAArg: {
+ const VAArgInst* va = cast<VAArgInst>(I);
+ Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst("
+ << opNames[0] << ", " << getCppName(va->getType()) << ", \"";
+ printEscapedString(va->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractElement: {
+ const ExtractElementInst* eei = cast<ExtractElementInst>(I);
+ Out << "ExtractElementInst* " << getCppName(eei)
+ << " = new ExtractElementInst(" << opNames[0]
+ << ", " << opNames[1] << ", \"";
+ printEscapedString(eei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertElement: {
+ const InsertElementInst* iei = cast<InsertElementInst>(I);
+ Out << "InsertElementInst* " << getCppName(iei)
+ << " = InsertElementInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(iei->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ShuffleVector: {
+ const ShuffleVectorInst* svi = cast<ShuffleVectorInst>(I);
+ Out << "ShuffleVectorInst* " << getCppName(svi)
+ << " = new ShuffleVectorInst(" << opNames[0]
+ << ", " << opNames[1] << ", " << opNames[2] << ", \"";
+ printEscapedString(svi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::ExtractValue: {
+ const ExtractValueInst *evi = cast<ExtractValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << evi->idx_begin()[i] << ");";
nl(Out);
- for (unsigned i = 0; i < evi->getNumIndices(); ++i) {
- Out << iName << "_indices.push_back("
- << evi->idx_begin()[i] << ");";
- nl(Out);
- }
- Out << "ExtractValueInst* " << getCppName(evi)
- << " = ExtractValueInst::Create(" << opNames[0]
- << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
- printEscapedString(evi->getName());
- Out << "\", " << bbname << ");";
- break;
}
- case Instruction::InsertValue: {
- const InsertValueInst *ivi = cast<InsertValueInst>(I);
- Out << "std::vector<unsigned> " << iName << "_indices;";
+ Out << "ExtractValueInst* " << getCppName(evi)
+ << " = ExtractValueInst::Create(" << opNames[0]
+ << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(evi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
+ case Instruction::InsertValue: {
+ const InsertValueInst *ivi = cast<InsertValueInst>(I);
+ Out << "std::vector<unsigned> " << iName << "_indices;";
+ nl(Out);
+ for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
+ Out << iName << "_indices.push_back("
+ << ivi->idx_begin()[i] << ");";
nl(Out);
- for (unsigned i = 0; i < ivi->getNumIndices(); ++i) {
- Out << iName << "_indices.push_back("
- << ivi->idx_begin()[i] << ");";
- nl(Out);
- }
- Out << "InsertValueInst* " << getCppName(ivi)
- << " = InsertValueInst::Create(" << opNames[0]
- << ", " << opNames[1] << ", "
- << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
- printEscapedString(ivi->getName());
- Out << "\", " << bbname << ");";
- break;
}
+ Out << "InsertValueInst* " << getCppName(ivi)
+ << " = InsertValueInst::Create(" << opNames[0]
+ << ", " << opNames[1] << ", "
+ << iName << "_indices.begin(), " << iName << "_indices.end(), \"";
+ printEscapedString(ivi->getName());
+ Out << "\", " << bbname << ");";
+ break;
+ }
}
DefinedValues.insert(I);
nl(Out);
delete [] opNames;
}
- // Print out the types, constants and declarations needed by one function
- void CppWriter::printFunctionUses(const Function* F) {
- nl(Out) << "// Type Definitions"; nl(Out);
- if (!is_inline) {
- // Print the function's return type
- printType(F->getReturnType());
+// Print out the types, constants and declarations needed by one function
+void CppWriter::printFunctionUses(const Function* F) {
+ nl(Out) << "// Type Definitions"; nl(Out);
+ if (!is_inline) {
+ // Print the function's return type
+ printType(F->getReturnType());
- // Print the function's function type
- printType(F->getFunctionType());
+ // Print the function's function type
+ printType(F->getFunctionType());
- // Print the types of each of the function's arguments
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- printType(AI->getType());
- }
+ // Print the types of each of the function's arguments
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ printType(AI->getType());
}
+ }
- // Print type definitions for every type referenced by an instruction and
- // make a note of any global values or constants that are referenced
- SmallPtrSet<GlobalValue*,64> gvs;
- SmallPtrSet<Constant*,64> consts;
- for (Function::const_iterator BB = F->begin(), BE = F->end();
- BB != BE; ++BB){
- for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
- I != E; ++I) {
- // Print the type of the instruction itself
- printType(I->getType());
+ // Print type definitions for every type referenced by an instruction and
+ // make a note of any global values or constants that are referenced
+ SmallPtrSet<GlobalValue*,64> gvs;
+ SmallPtrSet<Constant*,64> consts;
+ for (Function::const_iterator BB = F->begin(), BE = F->end();
+ BB != BE; ++BB){
+ for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+ I != E; ++I) {
+ // Print the type of the instruction itself
+ printType(I->getType());
- // Print the type of each of the instruction's operands
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value* operand = I->getOperand(i);
- printType(operand->getType());
-
- // If the operand references a GVal or Constant, make a note of it
- if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
- gvs.insert(GV);
- if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- if (GVar->hasInitializer())
- consts.insert(GVar->getInitializer());
- } else if (Constant* C = dyn_cast<Constant>(operand))
- consts.insert(C);
- }
+ // Print the type of each of the instruction's operands
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ Value* operand = I->getOperand(i);
+ printType(operand->getType());
+
+ // If the operand references a GVal or Constant, make a note of it
+ if (GlobalValue* GV = dyn_cast<GlobalValue>(operand)) {
+ gvs.insert(GV);
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
+ if (GVar->hasInitializer())
+ consts.insert(GVar->getInitializer());
+ } else if (Constant* C = dyn_cast<Constant>(operand))
+ consts.insert(C);
}
}
+ }
- // Print the function declarations for any functions encountered
- nl(Out) << "// Function Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (Function* Fun = dyn_cast<Function>(*I)) {
- if (!is_inline || Fun != F)
- printFunctionHead(Fun);
- }
+ // Print the function declarations for any functions encountered
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (Function* Fun = dyn_cast<Function>(*I)) {
+ if (!is_inline || Fun != F)
+ printFunctionHead(Fun);
}
+ }
- // Print the global variable declarations for any variables encountered
- nl(Out) << "// Global Variable Declarations"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
- printVariableHead(F);
- }
+ // Print the global variable declarations for any variables encountered
+ nl(Out) << "// Global Variable Declarations"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* F = dyn_cast<GlobalVariable>(*I))
+ printVariableHead(F);
+ }
- // Print the constants found
- nl(Out) << "// Constant Definitions"; nl(Out);
- for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
- E = consts.end(); I != E; ++I) {
- printConstant(*I);
- }
+// Print the constants found
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ for (SmallPtrSet<Constant*,64>::iterator I = consts.begin(),
+ E = consts.end(); I != E; ++I) {
+ printConstant(*I);
+ }
- // Process the global variables definitions now that all the constants have
- // been emitted. These definitions just couple the gvars with their constant
- // initializers.
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
- I != E; ++I) {
- if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
- printVariableBody(GV);
- }
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (SmallPtrSet<GlobalValue*,64>::iterator I = gvs.begin(), E = gvs.end();
+ I != E; ++I) {
+ if (GlobalVariable* GV = dyn_cast<GlobalVariable>(*I))
+ printVariableBody(GV);
}
+}
- void CppWriter::printFunctionHead(const Function* F) {
- nl(Out) << "Function* " << getCppName(F);
- if (is_inline) {
- Out << " = mod->getFunction(\"";
- printEscapedString(F->getName());
- Out << "\", " << getCppName(F->getFunctionType()) << ");";
- nl(Out) << "if (!" << getCppName(F) << ") {";
- nl(Out) << getCppName(F);
- }
- Out<< " = Function::Create(";
- nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
- nl(Out) << "/*Linkage=*/";
- printLinkageType(F->getLinkage());
- Out << ",";
- nl(Out) << "/*Name=*/\"";
+void CppWriter::printFunctionHead(const Function* F) {
+ nl(Out) << "Function* " << getCppName(F);
+ if (is_inline) {
+ Out << " = mod->getFunction(\"";
printEscapedString(F->getName());
- Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
- nl(Out,-1);
+ Out << "\", " << getCppName(F->getFunctionType()) << ");";
+ nl(Out) << "if (!" << getCppName(F) << ") {";
+ nl(Out) << getCppName(F);
+ }
+ Out<< " = Function::Create(";
+ nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ",";
+ nl(Out) << "/*Linkage=*/";
+ printLinkageType(F->getLinkage());
+ Out << ",";
+ nl(Out) << "/*Name=*/\"";
+ printEscapedString(F->getName());
+ Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : "");
+ nl(Out,-1);
+ printCppName(F);
+ Out << "->setCallingConv(";
+ printCallingConv(F->getCallingConv());
+ Out << ");";
+ nl(Out);
+ if (F->hasSection()) {
+ printCppName(F);
+ Out << "->setSection(\"" << F->getSection() << "\");";
+ nl(Out);
+ }
+ if (F->getAlignment()) {
+ printCppName(F);
+ Out << "->setAlignment(" << F->getAlignment() << ");";
+ nl(Out);
+ }
+ if (F->getVisibility() != GlobalValue::DefaultVisibility) {
printCppName(F);
- Out << "->setCallingConv(";
- printCallingConv(F->getCallingConv());
+ Out << "->setVisibility(";
+ printVisibilityType(F->getVisibility());
Out << ");";
nl(Out);
- if (F->hasSection()) {
- printCppName(F);
- Out << "->setSection(\"" << F->getSection() << "\");";
- nl(Out);
- }
- if (F->getAlignment()) {
- printCppName(F);
- Out << "->setAlignment(" << F->getAlignment() << ");";
- nl(Out);
- }
- if (F->getVisibility() != GlobalValue::DefaultVisibility) {
- printCppName(F);
- Out << "->setVisibility(";
- printVisibilityType(F->getVisibility());
- Out << ");";
- nl(Out);
- }
- if (F->hasGC()) {
- printCppName(F);
- Out << "->setGC(\"" << F->getGC() << "\");";
- nl(Out);
- }
- if (is_inline) {
- Out << "}";
- nl(Out);
- }
- printAttributes(F->getAttributes(), getCppName(F));
+ }
+ if (F->hasGC()) {
printCppName(F);
- Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ Out << "->setGC(\"" << F->getGC() << "\");";
nl(Out);
}
+ if (is_inline) {
+ Out << "}";
+ nl(Out);
+ }
+ printAttributes(F->getAttributes(), getCppName(F));
+ printCppName(F);
+ Out << "->setAttributes(" << getCppName(F) << "_PAL);";
+ nl(Out);
+}
- void CppWriter::printFunctionBody(const Function *F) {
- if (F->isDeclaration())
- return; // external functions have no bodies.
-
- // Clear the DefinedValues and ForwardRefs maps because we can't have
- // cross-function forward refs
- ForwardRefs.clear();
- DefinedValues.clear();
+void CppWriter::printFunctionBody(const Function *F) {
+ if (F->isDeclaration())
+ return; // external functions have no bodies.
- // Create all the argument values
- if (!is_inline) {
- if (!F->arg_empty()) {
- Out << "Function::arg_iterator args = " << getCppName(F)
- << "->arg_begin();";
- nl(Out);
- }
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << "Value* " << getCppName(AI) << " = args++;";
- nl(Out);
- if (AI->hasName()) {
- Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
- nl(Out);
- }
- }
- }
+ // Clear the DefinedValues and ForwardRefs maps because we can't have
+ // cross-function forward refs
+ ForwardRefs.clear();
+ DefinedValues.clear();
- // Create all the basic blocks
- nl(Out);
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- Out << "BasicBlock* " << bbname <<
- " = BasicBlock::Create(mod->getContext(), \"";
- if (BI->hasName())
- printEscapedString(BI->getName());
- Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ // Create all the argument values
+ if (!is_inline) {
+ if (!F->arg_empty()) {
+ Out << "Function::arg_iterator args = " << getCppName(F)
+ << "->arg_begin();";
nl(Out);
}
-
- // Output all of its basic blocks... for the function
- for (Function::const_iterator BI = F->begin(), BE = F->end();
- BI != BE; ++BI) {
- std::string bbname(getCppName(BI));
- nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << "Value* " << getCppName(AI) << " = args++;";
nl(Out);
-
- // Output all of the instructions in the basic block...
- for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
- I != E; ++I) {
- printInstruction(I,bbname);
+ if (AI->hasName()) {
+ Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");";
+ nl(Out);
}
}
+ }
- // Loop over the ForwardRefs and resolve them now that all instructions
- // are generated.
- if (!ForwardRefs.empty()) {
- nl(Out) << "// Resolve Forward References";
- nl(Out);
- }
+ // Create all the basic blocks
+ nl(Out);
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ Out << "BasicBlock* " << bbname <<
+ " = BasicBlock::Create(mod->getContext(), \"";
+ if (BI->hasName())
+ printEscapedString(BI->getName());
+ Out << "\"," << getCppName(BI->getParent()) << ",0);";
+ nl(Out);
+ }
- while (!ForwardRefs.empty()) {
- ForwardRefMap::iterator I = ForwardRefs.begin();
- Out << I->second << "->replaceAllUsesWith("
- << getCppName(I->first) << "); delete " << I->second << ";";
- nl(Out);
- ForwardRefs.erase(I);
+ // Output all of its basic blocks... for the function
+ for (Function::const_iterator BI = F->begin(), BE = F->end();
+ BI != BE; ++BI) {
+ std::string bbname(getCppName(BI));
+ nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")";
+ nl(Out);
+
+ // Output all of the instructions in the basic block...
+ for (BasicBlock::const_iterator I = BI->begin(), E = BI->end();
+ I != E; ++I) {
+ printInstruction(I,bbname);
}
}
- void CppWriter::printInline(const std::string& fname,
- const std::string& func) {
- const Function* F = TheModule->getFunction(func);
- if (!F) {
- error(std::string("Function '") + func + "' not found in input module");
- return;
- }
- if (F->isDeclaration()) {
- error(std::string("Function '") + func + "' is external!");
- return;
- }
- nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
- << getCppName(F);
- unsigned arg_count = 1;
- for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- AI != AE; ++AI) {
- Out << ", Value* arg_" << arg_count;
- }
- Out << ") {";
+ // Loop over the ForwardRefs and resolve them now that all instructions
+ // are generated.
+ if (!ForwardRefs.empty()) {
+ nl(Out) << "// Resolve Forward References";
nl(Out);
- is_inline = true;
- printFunctionUses(F);
- printFunctionBody(F);
- is_inline = false;
- Out << "return " << getCppName(F->begin()) << ";";
- nl(Out) << "}";
+ }
+
+ while (!ForwardRefs.empty()) {
+ ForwardRefMap::iterator I = ForwardRefs.begin();
+ Out << I->second << "->replaceAllUsesWith("
+ << getCppName(I->first) << "); delete " << I->second << ";";
nl(Out);
+ ForwardRefs.erase(I);
}
+}
- void CppWriter::printModuleBody() {
- // Print out all the type definitions
- nl(Out) << "// Type Definitions"; nl(Out);
- printTypes(TheModule);
-
- // Functions can call each other and global variables can reference them so
- // define all the functions first before emitting their function bodies.
- nl(Out) << "// Function Declarations"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I)
- printFunctionHead(I);
-
- // Process the global variables declarations. We can't initialze them until
- // after the constants are printed so just print a header for each global
- nl(Out) << "// Global Variable Declarations\n"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableHead(I);
- }
+void CppWriter::printInline(const std::string& fname,
+ const std::string& func) {
+ const Function* F = TheModule->getFunction(func);
+ if (!F) {
+ error(std::string("Function '") + func + "' not found in input module");
+ return;
+ }
+ if (F->isDeclaration()) {
+ error(std::string("Function '") + func + "' is external!");
+ return;
+ }
+ nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *"
+ << getCppName(F);
+ unsigned arg_count = 1;
+ for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+ AI != AE; ++AI) {
+ Out << ", Value* arg_" << arg_count;
+ }
+ Out << ") {";
+ nl(Out);
+ is_inline = true;
+ printFunctionUses(F);
+ printFunctionBody(F);
+ is_inline = false;
+ Out << "return " << getCppName(F->begin()) << ";";
+ nl(Out) << "}";
+ nl(Out);
+}
- // Print out all the constants definitions. Constants don't recurse except
- // through GlobalValues. All GlobalValues have been declared at this point
- // so we can proceed to generate the constants.
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstants(TheModule);
-
- // Process the global variables definitions now that all the constants have
- // been emitted. These definitions just couple the gvars with their constant
- // initializers.
- nl(Out) << "// Global Variable Definitions"; nl(Out);
- for (Module::const_global_iterator I = TheModule->global_begin(),
- E = TheModule->global_end(); I != E; ++I) {
- printVariableBody(I);
- }
+void CppWriter::printModuleBody() {
+ // Print out all the type definitions
+ nl(Out) << "// Type Definitions"; nl(Out);
+ printTypes(TheModule);
+
+ // Functions can call each other and global variables can reference them so
+ // define all the functions first before emitting their function bodies.
+ nl(Out) << "// Function Declarations"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I)
+ printFunctionHead(I);
+
+ // Process the global variables declarations. We can't initialze them until
+ // after the constants are printed so just print a header for each global
+ nl(Out) << "// Global Variable Declarations\n"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableHead(I);
+ }
- // Finally, we can safely put out all of the function bodies.
- nl(Out) << "// Function Definitions"; nl(Out);
- for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
- I != E; ++I) {
- if (!I->isDeclaration()) {
- nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
- << ")";
- nl(Out) << "{";
- nl(Out,1);
- printFunctionBody(I);
- nl(Out,-1) << "}";
- nl(Out);
- }
- }
+ // Print out all the constants definitions. Constants don't recurse except
+ // through GlobalValues. All GlobalValues have been declared at this point
+ // so we can proceed to generate the constants.
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstants(TheModule);
+
+ // Process the global variables definitions now that all the constants have
+ // been emitted. These definitions just couple the gvars with their constant
+ // initializers.
+ nl(Out) << "// Global Variable Definitions"; nl(Out);
+ for (Module::const_global_iterator I = TheModule->global_begin(),
+ E = TheModule->global_end(); I != E; ++I) {
+ printVariableBody(I);
}
- void CppWriter::printProgram(const std::string& fname,
- const std::string& mName) {
- Out << "#include <llvm/LLVMContext.h>\n";
- Out << "#include <llvm/Module.h>\n";
- Out << "#include <llvm/DerivedTypes.h>\n";
- Out << "#include <llvm/Constants.h>\n";
- Out << "#include <llvm/GlobalVariable.h>\n";
- Out << "#include <llvm/Function.h>\n";
- Out << "#include <llvm/CallingConv.h>\n";
- Out << "#include <llvm/BasicBlock.h>\n";
- Out << "#include <llvm/Instructions.h>\n";
- Out << "#include <llvm/InlineAsm.h>\n";
- Out << "#include <llvm/Support/FormattedStream.h>\n";
- Out << "#include <llvm/Support/MathExtras.h>\n";
- Out << "#include <llvm/Pass.h>\n";
- Out << "#include <llvm/PassManager.h>\n";
- Out << "#include <llvm/ADT/SmallVector.h>\n";
- Out << "#include <llvm/Analysis/Verifier.h>\n";
- Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
- Out << "#include <algorithm>\n";
- Out << "using namespace llvm;\n\n";
- Out << "Module* " << fname << "();\n\n";
- Out << "int main(int argc, char**argv) {\n";
- Out << " Module* Mod = " << fname << "();\n";
- Out << " verifyModule(*Mod, PrintMessageAction);\n";
- Out << " PassManager PM;\n";
- Out << " PM.add(createPrintModulePass(&outs()));\n";
- Out << " PM.run(*Mod);\n";
- Out << " return 0;\n";
- Out << "}\n\n";
- printModule(fname,mName);
- }
-
- void CppWriter::printModule(const std::string& fname,
- const std::string& mName) {
- nl(Out) << "Module* " << fname << "() {";
- nl(Out,1) << "// Module Construction";
- nl(Out) << "Module* mod = new Module(\"";
- printEscapedString(mName);
- Out << "\", getGlobalContext());";
- if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
- }
- if (!TheModule->getTargetTriple().empty()) {
- nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
- << "\");";
+ // Finally, we can safely put out all of the function bodies.
+ nl(Out) << "// Function Definitions"; nl(Out);
+ for (Module::const_iterator I = TheModule->begin(), E = TheModule->end();
+ I != E; ++I) {
+ if (!I->isDeclaration()) {
+ nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I)
+ << ")";
+ nl(Out) << "{";
+ nl(Out,1);
+ printFunctionBody(I);
+ nl(Out,-1) << "}";
+ nl(Out);
}
+ }
+}
- if (!TheModule->getModuleInlineAsm().empty()) {
- nl(Out) << "mod->setModuleInlineAsm(\"";
- printEscapedString(TheModule->getModuleInlineAsm());
- Out << "\");";
- }
- nl(Out);
+void CppWriter::printProgram(const std::string& fname,
+ const std::string& mName) {
+ Out << "#include <llvm/LLVMContext.h>\n";
+ Out << "#include <llvm/Module.h>\n";
+ Out << "#include <llvm/DerivedTypes.h>\n";
+ Out << "#include <llvm/Constants.h>\n";
+ Out << "#include <llvm/GlobalVariable.h>\n";
+ Out << "#include <llvm/Function.h>\n";
+ Out << "#include <llvm/CallingConv.h>\n";
+ Out << "#include <llvm/BasicBlock.h>\n";
+ Out << "#include <llvm/Instructions.h>\n";
+ Out << "#include <llvm/InlineAsm.h>\n";
+ Out << "#include <llvm/Support/FormattedStream.h>\n";
+ Out << "#include <llvm/Support/MathExtras.h>\n";
+ Out << "#include <llvm/Pass.h>\n";
+ Out << "#include <llvm/PassManager.h>\n";
+ Out << "#include <llvm/ADT/SmallVector.h>\n";
+ Out << "#include <llvm/Analysis/Verifier.h>\n";
+ Out << "#include <llvm/Assembly/PrintModulePass.h>\n";
+ Out << "#include <algorithm>\n";
+ Out << "using namespace llvm;\n\n";
+ Out << "Module* " << fname << "();\n\n";
+ Out << "int main(int argc, char**argv) {\n";
+ Out << " Module* Mod = " << fname << "();\n";
+ Out << " verifyModule(*Mod, PrintMessageAction);\n";
+ Out << " PassManager PM;\n";
+ Out << " PM.add(createPrintModulePass(&outs()));\n";
+ Out << " PM.run(*Mod);\n";
+ Out << " return 0;\n";
+ Out << "}\n\n";
+ printModule(fname,mName);
+}
- // Loop over the dependent libraries and emit them.
- Module::lib_iterator LI = TheModule->lib_begin();
- Module::lib_iterator LE = TheModule->lib_end();
- while (LI != LE) {
- Out << "mod->addLibrary(\"" << *LI << "\");";
- nl(Out);
- ++LI;
- }
- printModuleBody();
- nl(Out) << "return mod;";
- nl(Out,-1) << "}";
+void CppWriter::printModule(const std::string& fname,
+ const std::string& mName) {
+ nl(Out) << "Module* " << fname << "() {";
+ nl(Out,1) << "// Module Construction";
+ nl(Out) << "Module* mod = new Module(\"";
+ printEscapedString(mName);
+ Out << "\", getGlobalContext());";
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
+ }
+ if (!TheModule->getTargetTriple().empty()) {
+ nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple()
+ << "\");";
+ }
+
+ if (!TheModule->getModuleInlineAsm().empty()) {
+ nl(Out) << "mod->setModuleInlineAsm(\"";
+ printEscapedString(TheModule->getModuleInlineAsm());
+ Out << "\");";
+ }
+ nl(Out);
+
+ // Loop over the dependent libraries and emit them.
+ Module::lib_iterator LI = TheModule->lib_begin();
+ Module::lib_iterator LE = TheModule->lib_end();
+ while (LI != LE) {
+ Out << "mod->addLibrary(\"" << *LI << "\");";
nl(Out);
+ ++LI;
}
+ printModuleBody();
+ nl(Out) << "return mod;";
+ nl(Out,-1) << "}";
+ nl(Out);
+}
+
+void CppWriter::printContents(const std::string& fname,
+ const std::string& mName) {
+ Out << "\nModule* " << fname << "(Module *mod) {\n";
+ Out << "\nmod->setModuleIdentifier(\"";
+ printEscapedString(mName);
+ Out << "\");\n";
+ printModuleBody();
+ Out << "\nreturn mod;\n";
+ Out << "\n}\n";
+}
- void CppWriter::printContents(const std::string& fname,
- const std::string& mName) {
- Out << "\nModule* " << fname << "(Module *mod) {\n";
- Out << "\nmod->setModuleIdentifier(\"";
- printEscapedString(mName);
- Out << "\");\n";
- printModuleBody();
- Out << "\nreturn mod;\n";
- Out << "\n}\n";
+void CppWriter::printFunction(const std::string& fname,
+ const std::string& funcName) {
+ const Function* F = TheModule->getFunction(funcName);
+ if (!F) {
+ error(std::string("Function '") + funcName + "' not found in input module");
+ return;
}
+ Out << "\nFunction* " << fname << "(Module *mod) {\n";
+ printFunctionUses(F);
+ printFunctionHead(F);
+ printFunctionBody(F);
+ Out << "return " << getCppName(F) << ";\n";
+ Out << "}\n";
+}
- void CppWriter::printFunction(const std::string& fname,
- const std::string& funcName) {
- const Function* F = TheModule->getFunction(funcName);
- if (!F) {
- error(std::string("Function '") + funcName + "' not found in input module");
- return;
- }
- Out << "\nFunction* " << fname << "(Module *mod) {\n";
- printFunctionUses(F);
- printFunctionHead(F);
- printFunctionBody(F);
- Out << "return " << getCppName(F) << ";\n";
- Out << "}\n";
- }
-
- void CppWriter::printFunctions() {
- const Module::FunctionListType &funcs = TheModule->getFunctionList();
- Module::const_iterator I = funcs.begin();
- Module::const_iterator IE = funcs.end();
-
- for (; I != IE; ++I) {
- const Function &func = *I;
- if (!func.isDeclaration()) {
- std::string name("define_");
- name += func.getName();
- printFunction(name, func.getName());
- }
+void CppWriter::printFunctions() {
+ const Module::FunctionListType &funcs = TheModule->getFunctionList();
+ Module::const_iterator I = funcs.begin();
+ Module::const_iterator IE = funcs.end();
+
+ for (; I != IE; ++I) {
+ const Function &func = *I;
+ if (!func.isDeclaration()) {
+ std::string name("define_");
+ name += func.getName();
+ printFunction(name, func.getName());
}
}
+}
- void CppWriter::printVariable(const std::string& fname,
- const std::string& varName) {
- const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
+void CppWriter::printVariable(const std::string& fname,
+ const std::string& varName) {
+ const GlobalVariable* GV = TheModule->getNamedGlobal(varName);
- if (!GV) {
- error(std::string("Variable '") + varName + "' not found in input module");
- return;
- }
- Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
- printVariableUses(GV);
- printVariableHead(GV);
- printVariableBody(GV);
- Out << "return " << getCppName(GV) << ";\n";
- Out << "}\n";
- }
-
- void CppWriter::printType(const std::string& fname,
- const std::string& typeName) {
- const Type* Ty = TheModule->getTypeByName(typeName);
- if (!Ty) {
- error(std::string("Type '") + typeName + "' not found in input module");
- return;
- }
- Out << "\nType* " << fname << "(Module *mod) {\n";
- printType(Ty);
- Out << "return " << getCppName(Ty) << ";\n";
- Out << "}\n";
- }
-
- bool CppWriter::runOnModule(Module &M) {
- TheModule = &M;
-
- // Emit a header
- Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
-
- // Get the name of the function we're supposed to generate
- std::string fname = FuncName.getValue();
-
- // Get the name of the thing we are to generate
- std::string tgtname = NameToGenerate.getValue();
- if (GenerationType == GenModule ||
- GenerationType == GenContents ||
- GenerationType == GenProgram ||
- GenerationType == GenFunctions) {
- if (tgtname == "!bad!") {
- if (M.getModuleIdentifier() == "-")
- tgtname = "<stdin>";
- else
- tgtname = M.getModuleIdentifier();
- }
- } else if (tgtname == "!bad!")
- error("You must use the -for option with -gen-{function,variable,type}");
-
- switch (WhatToGenerate(GenerationType)) {
- case GenProgram:
- if (fname.empty())
- fname = "makeLLVMModule";
- printProgram(fname,tgtname);
- break;
- case GenModule:
- if (fname.empty())
- fname = "makeLLVMModule";
- printModule(fname,tgtname);
- break;
- case GenContents:
- if (fname.empty())
- fname = "makeLLVMModuleContents";
- printContents(fname,tgtname);
- break;
- case GenFunction:
- if (fname.empty())
- fname = "makeLLVMFunction";
- printFunction(fname,tgtname);
- break;
- case GenFunctions:
- printFunctions();
- break;
- case GenInline:
- if (fname.empty())
- fname = "makeLLVMInline";
- printInline(fname,tgtname);
- break;
- case GenVariable:
- if (fname.empty())
- fname = "makeLLVMVariable";
- printVariable(fname,tgtname);
- break;
- case GenType:
- if (fname.empty())
- fname = "makeLLVMType";
- printType(fname,tgtname);
- break;
- default:
- error("Invalid generation option");
- }
+ if (!GV) {
+ error(std::string("Variable '") + varName + "' not found in input module");
+ return;
+ }
+ Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n";
+ printVariableUses(GV);
+ printVariableHead(GV);
+ printVariableBody(GV);
+ Out << "return " << getCppName(GV) << ";\n";
+ Out << "}\n";
+}
- return false;
+void CppWriter::printType(const std::string& fname,
+ const std::string& typeName) {
+ const Type* Ty = TheModule->getTypeByName(typeName);
+ if (!Ty) {
+ error(std::string("Type '") + typeName + "' not found in input module");
+ return;
}
+ Out << "\nType* " << fname << "(Module *mod) {\n";
+ printType(Ty);
+ Out << "return " << getCppName(Ty) << ";\n";
+ Out << "}\n";
+}
+
+bool CppWriter::runOnModule(Module &M) {
+ TheModule = &M;
+
+ // Emit a header
+ Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n";
+
+ // Get the name of the function we're supposed to generate
+ std::string fname = FuncName.getValue();
+
+ // Get the name of the thing we are to generate
+ std::string tgtname = NameToGenerate.getValue();
+ if (GenerationType == GenModule ||
+ GenerationType == GenContents ||
+ GenerationType == GenProgram ||
+ GenerationType == GenFunctions) {
+ if (tgtname == "!bad!") {
+ if (M.getModuleIdentifier() == "-")
+ tgtname = "<stdin>";
+ else
+ tgtname = M.getModuleIdentifier();
+ }
+ } else if (tgtname == "!bad!")
+ error("You must use the -for option with -gen-{function,variable,type}");
+
+ switch (WhatToGenerate(GenerationType)) {
+ case GenProgram:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printProgram(fname,tgtname);
+ break;
+ case GenModule:
+ if (fname.empty())
+ fname = "makeLLVMModule";
+ printModule(fname,tgtname);
+ break;
+ case GenContents:
+ if (fname.empty())
+ fname = "makeLLVMModuleContents";
+ printContents(fname,tgtname);
+ break;
+ case GenFunction:
+ if (fname.empty())
+ fname = "makeLLVMFunction";
+ printFunction(fname,tgtname);
+ break;
+ case GenFunctions:
+ printFunctions();
+ break;
+ case GenInline:
+ if (fname.empty())
+ fname = "makeLLVMInline";
+ printInline(fname,tgtname);
+ break;
+ case GenVariable:
+ if (fname.empty())
+ fname = "makeLLVMVariable";
+ printVariable(fname,tgtname);
+ break;
+ case GenType:
+ if (fname.empty())
+ fname = "makeLLVMType";
+ printType(fname,tgtname);
+ break;
+ default:
+ error("Invalid generation option");
+ }
+
+ return false;
}
char CppWriter::ID = 0;
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index e42e9b3..b6e4d65 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -145,8 +145,9 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg());
- if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg);
+ if (MBlaze::CPURegsRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index 23889b1..1730b68 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -234,6 +234,24 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineRegisterInfo &R = F->getRegInfo();
MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by transfering adding all successors and
+ // remaining instructions from the current block to the new block which
+ // will contain the Phi node for the select.
+ finish->splice(finish->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ finish->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(loop);
+ BB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
@@ -249,26 +267,6 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(IAMT)
.addMBB(finish);
- F->insert(It, loop);
- F->insert(It, finish);
-
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- finish->addSuccessor(*i);
-
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- BB->addSuccessor(loop);
- BB->addSuccessor(finish);
-
- // Next, add the finish block as a successor of the loop block
- loop->addSuccessor(finish);
- loop->addSuccessor(loop);
-
unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
@@ -298,12 +296,13 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
.addReg(NAMT)
.addMBB(loop);
- BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ BuildMI(*finish, finish->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
.addReg(IVAL).addMBB(BB)
.addReg(NDST).addMBB(loop);
// The pseudo instruction is no longer needed so remove it
- F->DeleteMachineInstr(MI);
+ MI->eraseFromParent();
return finish;
}
@@ -338,27 +337,23 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
}
- BuildMI(BB, dl, TII->get(Opc))
- .addReg(MI->getOperand(3).getReg())
- .addMBB(dneBB);
-
F->insert(It, flsBB);
F->insert(It, dneBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- dneBB->addSuccessor(*i);
+ // Transfer the remainder of BB and its successor edges to dneBB.
+ dneBB->splice(dneBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ dneBB->transferSuccessorsAndUpdatePHIs(BB);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
BB->addSuccessor(flsBB);
BB->addSuccessor(dneBB);
flsBB->addSuccessor(dneBB);
+ BuildMI(BB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
@@ -366,11 +361,12 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
// .addReg(MI->getOperand(2).getReg()).addMBB(BB);
- BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ BuildMI(*dneBB, dneBB->begin(), dl,
+ TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
.addReg(MI->getOperand(1).getReg()).addMBB(BB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return dneBB;
}
}
@@ -408,7 +404,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
}
@@ -439,10 +435,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
SDValue MBlazeTargetLowering::
LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
SDValue ResNode;
- EVT PtrVT = Op.getValueType();
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
const Constant *C = N->getConstVal();
- SDValue Zero = DAG.getConstant(0, PtrVT);
DebugLoc dl = Op.getDebugLoc();
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
@@ -531,6 +525,7 @@ SDValue MBlazeTargetLowering::
LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -562,7 +557,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -590,7 +585,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// Create the frame index object for this incoming parameter
LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc, true, false);
+ LastArgStackLoc, true);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
@@ -623,7 +618,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
// node so that legalize doesn't hack it.
unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
@@ -779,7 +774,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
(FirstStackArgLoc + VA.getLocMemOffset())));
@@ -810,7 +805,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
unsigned LiveReg = MF.addLiveIn(Reg, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32);
- int FI = MFI->CreateFixedObject(4, 0, true, false);
+ int FI = MFI->CreateFixedObject(4, 0, true);
MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
@@ -841,6 +836,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
SDValue MBlazeTargetLowering::
LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
// the return value to a location
@@ -869,7 +865,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 9f9ac89..5ec2563 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -109,6 +109,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -117,6 +118,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual MachineBasicBlock *
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 4c4d86b..6ff5825 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -110,15 +110,13 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
BuildMI(MBB, MI, DL, get(MBlaze::NOP));
}
-bool MBlazeInstrInfo::
-copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
+void MBlazeInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
- .addReg(SrcReg).addReg(MBlaze::R0);
- return true;
+ .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0);
}
void MBlazeInstrInfo::
@@ -141,54 +139,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(0).addFrameIndex(FI);
}
-MachineInstr *MBlazeInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const {
- if (Ops.size() != 1) return NULL;
-
- MachineInstr *NewMI = NULL;
-
- switch (MI->getOpcode()) {
- case MBlaze::OR:
- case MBlaze::ADD:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(2).isReg()) &&
- (MI->getOperand(2).getReg() == MBlaze::R0) &&
- (MI->getOperand(1).isReg())) {
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- }
-
- return NewMI;
-}
-
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
unsigned MBlazeInstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Can only insert uncond branches so far.
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB);
return 1;
}
@@ -209,12 +170,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
- MBlaze::CPURegsRegisterClass,
- MBlaze::CPURegsRegisterClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(MBlaze::R20);
RegInfo.addLiveIn(MBlaze::R20);
MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index c9fdc88..f074370 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -198,13 +198,12 @@ public:
/// Branch Analysis
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -217,18 +216,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
/// Insert nop instruction when hazard condition is found
virtual void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index f15eea9..8cafa8c 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -148,22 +148,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-/// MBlaze Callee Saved Register Classes
-const TargetRegisterClass* const* MBlazeRegisterInfo::
-getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRC[] = {
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
- 0
- };
-
- return CalleeSavedRC;
-}
-
BitVector MBlazeRegisterInfo::
getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index b618bf4..af97b0e 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -54,9 +54,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 3de173c..8f97d25 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -808,7 +808,7 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
std::string Name;
switch (Inst->getIntrinsicID()) {
case Intrinsic::vastart:
- Name = getValueName(Inst->getOperand(1));
+ Name = getValueName(Inst->getArgOperand(0));
Name.insert(Name.length()-1,"$valist");
// Obtain the argument handle.
printSimpleInstruction("ldloca",Name.c_str());
@@ -817,20 +817,20 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) {
"instance void [mscorlib]System.ArgIterator::.ctor"
"(valuetype [mscorlib]System.RuntimeArgumentHandle)");
// Save as pointer type "void*"
- printValueLoad(Inst->getOperand(1));
+ printValueLoad(Inst->getArgOperand(0));
printSimpleInstruction("ldloca",Name.c_str());
printIndirectSave(PointerType::getUnqual(
IntegerType::get(Inst->getContext(), 8)));
break;
case Intrinsic::vaend:
// Close argument list handle.
- printIndirectLoad(Inst->getOperand(1));
+ printIndirectLoad(Inst->getArgOperand(0));
printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()");
break;
case Intrinsic::vacopy:
// Copy "ArgIterator" valuetype.
- printIndirectLoad(Inst->getOperand(1));
- printIndirectLoad(Inst->getOperand(2));
+ printIndirectLoad(Inst->getArgOperand(0));
+ printIndirectLoad(Inst->getArgOperand(1));
printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator");
break;
default:
@@ -845,10 +845,11 @@ void MSILWriter::printCallInstruction(const Instruction* Inst) {
// Handle intrinsic function.
printIntrinsicCall(cast<IntrinsicInst>(Inst));
} else {
+ const CallInst *CI = cast<CallInst>(Inst);
// Load arguments to stack and call function.
- for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I)
- printValueLoad(Inst->getOperand(I));
- printFunctionCall(Inst->getOperand(0),Inst);
+ for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I)
+ printValueLoad(CI->getArgOperand(I));
+ printFunctionCall(CI->getCalledFunction(), Inst);
}
}
@@ -1002,8 +1003,8 @@ void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) {
std::string Label = "leave$normal_"+utostr(getUniqID());
Out << ".try {\n";
// Load arguments
- for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I)
- printValueLoad(Inst->getOperand(I));
+ for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I)
+ printValueLoad(Inst->getArgOperand(I));
// Print call instruction
printFunctionCall(Inst->getOperand(0),Inst);
// Save function result and leave "try" block
@@ -1280,7 +1281,7 @@ void MSILWriter::printLocalVariables(const Function& F) {
case Intrinsic::vaend:
case Intrinsic::vacopy:
isVaList = true;
- VaList = Inst->getOperand(1);
+ VaList = Inst->getArgOperand(0);
break;
default:
isVaList = false;
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 7b328bb..3395e9f 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -272,7 +272,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N,
AM.Base.Reg;
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp,
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(),
+ MVT::i16, AM.Disp,
0/*AM.SymbolFlags*/);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 403400e..a1703a3 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -278,6 +278,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -290,7 +291,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
case CallingConv::MSP430_INTR:
report_fatal_error("ISRs cannot be called directly");
return SDValue();
@@ -369,7 +370,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
<< "\n";
}
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -387,6 +388,7 @@ SDValue
MSP430TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location
@@ -421,7 +423,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together,
// avoiding something bad.
@@ -447,6 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -471,7 +474,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -529,7 +532,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16);
@@ -642,7 +645,8 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op,
int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
// Create the TargetGlobalAddress node, folding in the constant offset.
- SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ getPointerTy(), Offset);
return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(),
getPointerTy(), Result);
}
@@ -888,7 +892,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- true, false);
+ true);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -1070,7 +1074,10 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
// Update machine-CFG edges by transferring all successors of the current
// block to the block containing instructions after shift.
- RemBB->transferSuccessors(BB);
+ RemBB->splice(RemBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ RemBB->transferSuccessorsAndUpdatePHIs(BB);
// Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
BB->addSuccessor(LoopBB);
@@ -1116,11 +1123,11 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
// RemBB:
// DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
- BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg)
+ BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg)
.addReg(SrcReg).addMBB(BB)
.addReg(ShiftReg2).addMBB(LoopBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return RemBB;
}
@@ -1158,18 +1165,22 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII.get(MSP430::JCC))
- .addMBB(copy1MBB)
- .addImm(MI->getOperand(3).getImm());
F->insert(I, copy0MBB);
F->insert(I, copy1MBB);
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- copy1MBB->transferSuccessors(BB);
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(copy1MBB);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(copy1MBB)
+ .addImm(MI->getOperand(3).getImm());
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to copy1MBB
@@ -1182,11 +1193,11 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = copy1MBB;
- BuildMI(BB, dl, TII.get(MSP430::PHI),
+ BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index 01c5071..673c543 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -127,6 +127,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -155,6 +156,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -163,6 +165,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 18226ab..df28d07 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -83,27 +83,20 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Cannot store this register to stack slot!");
}
-bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC == SrcRC) {
- unsigned Opc;
- if (DestRC == &MSP430::GR16RegClass) {
- Opc = MSP430::MOV16rr;
- } else if (DestRC == &MSP430::GR8RegClass) {
- Opc = MSP430::MOV8rr;
- } else {
- return false;
- }
-
- BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg);
- return true;
- }
+void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (MSP430::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV16rr;
+ else if (MSP430::GR8RegClass.contains(DestReg, SrcReg))
+ Opc = MSP430::MOV8rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
- return false;
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -330,10 +323,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned
MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc DL;
-
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 842b4cb..ebbda1a 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -49,11 +49,10 @@ public:
///
virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; }
- bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -93,7 +92,8 @@ public:
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
};
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 6b9a2f2..8792b22 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -25,13 +25,16 @@ class SDTCisI16<int OpNum> : SDTCisVT<OpNum, i16>;
def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>;
def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
-def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
+def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
+ SDTCisPtrTy<0>]>;
def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>]>;
-def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
+def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
-def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
+ SDTCisI8<2>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
@@ -46,7 +49,7 @@ def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>;
def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call,
- [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>;
+ [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>;
def MSP430callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart,
[SDNPHasChain, SDNPOutFlag]>;
@@ -55,8 +58,10 @@ def MSP430callseq_end :
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
-def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
-def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC,
+ [SDNPHasChain, SDNPInFlag]>;
+def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC,
+ [SDNPInFlag]>;
def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
@@ -117,14 +122,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2),
}
let usesCustomInserter = 1 in {
- def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc),
+ def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc),
"# Select8 PSEUDO",
[(set GR8:$dst,
- (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>;
- def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc),
+ (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>;
+ def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc),
"# Select16 PSEUDO",
[(set GR16:$dst,
- (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+ (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>;
let Defs = [SRW] in {
def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
"# Shl8 PSEUDO",
@@ -330,60 +335,60 @@ def MOV16mm : I16mm<0x0,
//===----------------------------------------------------------------------===//
// Arithmetic Instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src = $dst" in {
let Defs = [SRW] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
def ADD8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (add GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def ADD16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (add GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def ADD8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (add GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def ADD16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (add GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"add.b\t{@$base+, $dst}", []>;
def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"add.w\t{@$base+, $dst}", []>;
}
def ADD8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"add.b\t{$src2, $dst}",
- [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (add GR8:$src, imm:$src2)),
(implicit SRW)]>;
def ADD16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"add.w\t{$src2, $dst}",
- [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (add GR16:$src, imm:$src2)),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADD8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"add.b\t{$src, $dst}",
@@ -424,40 +429,40 @@ let Uses = [SRW] in {
let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y
def ADC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (adde GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def ADC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (adde GR16:$src, GR16:$src2)),
(implicit SRW)]>;
} // isCommutable
def ADC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (adde GR8:$src, imm:$src2)),
(implicit SRW)]>;
def ADC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (adde GR16:$src, imm:$src2)),
(implicit SRW)]>;
def ADC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"addc.b\t{$src2, $dst}",
- [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def ADC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"addc.w\t{$src2, $dst}",
- [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"addc.b\t{$src, $dst}",
@@ -498,52 +503,52 @@ def ADC16mm : I8mm<0x0,
let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y
def AND8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (and GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def AND16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (and GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def AND8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (and GR8:$src, imm:$src2)),
(implicit SRW)]>;
def AND16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (and GR16:$src, imm:$src2)),
(implicit SRW)]>;
def AND8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"and.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (and GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def AND16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"and.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (and GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"and.b\t{@$base+, $dst}", []>;
def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"and.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def AND8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"and.b\t{$src, $dst}",
@@ -582,46 +587,46 @@ def AND16mm : I16mm<0x0,
let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
def OR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+ [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>;
def OR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+ [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>;
}
def OR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+ [(set GR8:$dst, (or GR8:$src, imm:$src2))]>;
def OR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+ [(set GR16:$dst, (or GR16:$src, imm:$src2))]>;
def OR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+ [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>;
def OR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+ [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"bis.b\t{@$base+, $dst}", []>;
def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"bis.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def OR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"bis.b\t{$src, $dst}",
@@ -654,24 +659,24 @@ def OR16mm : I16mm<0x0,
// bic does not modify condition codes
def BIC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>;
+ [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>;
def BIC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>;
+ [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>;
def BIC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"bic.b\t{$src2, $dst}",
- [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>;
+ [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>;
def BIC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"bic.w\t{$src2, $dst}",
- [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>;
+ [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def BIC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"bic.b\t{$src, $dst}",
@@ -695,52 +700,52 @@ def BIC16mm : I16mm<0x0,
let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
def XOR8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (xor GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def XOR16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (xor GR16:$src, GR16:$src2)),
(implicit SRW)]>;
}
def XOR8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (xor GR8:$src, imm:$src2)),
(implicit SRW)]>;
def XOR16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (xor GR16:$src, imm:$src2)),
(implicit SRW)]>;
def XOR8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"xor.b\t{$src2, $dst}",
- [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def XOR16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"xor.w\t{$src2, $dst}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"xor.b\t{@$base+, $dst}", []>;
def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"xor.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def XOR8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"xor.b\t{$src, $dst}",
@@ -777,51 +782,51 @@ def XOR16mm : I16mm<0x0,
def SUB8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (sub GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def SUB16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (sub GR16:$src, GR16:$src2)),
(implicit SRW)]>;
def SUB8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (sub GR8:$src, imm:$src2)),
(implicit SRW)]>;
def SUB16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (sub GR16:$src, imm:$src2)),
(implicit SRW)]>;
def SUB8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"sub.b\t{$src2, $dst}",
- [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def SUB16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"sub.w\t{$src2, $dst}",
- [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1,
-Constraints = "$base = $base_wb, $src1 = $dst" in {
+Constraints = "$base = $base_wb, $src = $dst" in {
def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR8:$dst, GR16:$base_wb),
- (ins GR8:$src1, GR16:$base),
+ (ins GR8:$src, GR16:$base),
"sub.b\t{@$base+, $dst}", []>;
def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes,
(outs GR16:$dst, GR16:$base_wb),
- (ins GR16:$src1, GR16:$base),
+ (ins GR16:$src, GR16:$base),
"sub.w\t{@$base+, $dst}", []>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SUB8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"sub.b\t{$src, $dst}",
@@ -860,39 +865,39 @@ def SUB16mm : I16mm<0x0,
let Uses = [SRW] in {
def SBC8rr : I8rr<0x0,
- (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ (outs GR8:$dst), (ins GR8:$src, GR8:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)),
+ [(set GR8:$dst, (sube GR8:$src, GR8:$src2)),
(implicit SRW)]>;
def SBC16rr : I16rr<0x0,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ (outs GR16:$dst), (ins GR16:$src, GR16:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)),
+ [(set GR16:$dst, (sube GR16:$src, GR16:$src2)),
(implicit SRW)]>;
def SBC8ri : I8ri<0x0,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ (outs GR8:$dst), (ins GR8:$src, i8imm:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, imm:$src2)),
+ [(set GR8:$dst, (sube GR8:$src, imm:$src2)),
(implicit SRW)]>;
def SBC16ri : I16ri<0x0,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ (outs GR16:$dst), (ins GR16:$src, i16imm:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, imm:$src2)),
+ [(set GR16:$dst, (sube GR16:$src, imm:$src2)),
(implicit SRW)]>;
def SBC8rm : I8rm<0x0,
- (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ (outs GR8:$dst), (ins GR8:$src, memsrc:$src2),
"subc.b\t{$src2, $dst}",
- [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))),
+ [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))),
(implicit SRW)]>;
def SBC16rm : I16rm<0x0,
- (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ (outs GR16:$dst), (ins GR16:$src, memsrc:$src2),
"subc.w\t{$src2, $dst}",
- [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))),
+ [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))),
(implicit SRW)]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SBC8mr : I8mr<0x0,
(outs), (ins memdst:$dst, GR8:$src),
"subc.b\t{$src, $dst}",
@@ -985,59 +990,59 @@ def SWPB16r : II16r<0x0,
"swpb\t$dst",
[(set GR16:$dst, (bswap GR16:$src))]>;
-} // isTwoAddress = 1
+} // Constraints = "$src = $dst"
// Integer comparisons
let Defs = [SRW] in {
def CMP8rr : I8rr<0x0,
- (outs), (ins GR8:$src1, GR8:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>;
+ (outs), (ins GR8:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>;
def CMP16rr : I16rr<0x0,
- (outs), (ins GR16:$src1, GR16:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>;
+ (outs), (ins GR16:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>;
def CMP8ri : I8ri<0x0,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>;
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>;
def CMP16ri : I16ri<0x0,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>;
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>;
def CMP8mi : I8mi<0x0,
- (outs), (ins memsrc:$src1, i8imm:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1),
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
(i8 imm:$src2)), (implicit SRW)]>;
def CMP16mi : I16mi<0x0,
- (outs), (ins memsrc:$src1, i16imm:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1),
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src),
(i16 imm:$src2)), (implicit SRW)]>;
def CMP8rm : I8rm<0x0,
- (outs), (ins GR8:$src1, memsrc:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp GR8:$src1, (load addr:$src2)),
+ (outs), (ins GR8:$src, memsrc:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp GR8:$src, (load addr:$src2)),
(implicit SRW)]>;
def CMP16rm : I16rm<0x0,
- (outs), (ins GR16:$src1, memsrc:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp GR16:$src1, (load addr:$src2)),
+ (outs), (ins GR16:$src, memsrc:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp GR16:$src, (load addr:$src2)),
(implicit SRW)]>;
def CMP8mr : I8mr<0x0,
- (outs), (ins memsrc:$src1, GR8:$src2),
- "cmp.b\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1), GR8:$src2),
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "cmp.b\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR8:$src2),
(implicit SRW)]>;
def CMP16mr : I16mr<0x0,
- (outs), (ins memsrc:$src1, GR16:$src2),
- "cmp.w\t{$src2, $src1}",
- [(MSP430cmp (load addr:$src1), GR16:$src2),
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "cmp.w\t{$src2, $src}",
+ [(MSP430cmp (load addr:$src), GR16:$src2),
(implicit SRW)]>;
@@ -1045,71 +1050,71 @@ def CMP16mr : I16mr<0x0,
// Note that the C condition is set differently than when using CMP.
let isCommutable = 1 in {
def BIT8rr : I8rr<0x0,
- (outs), (ins GR8:$src1, GR8:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, GR8:$src2), 0),
+ (outs), (ins GR8:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0),
(implicit SRW)]>;
def BIT16rr : I16rr<0x0,
- (outs), (ins GR16:$src1, GR16:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, GR16:$src2), 0),
+ (outs), (ins GR16:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0),
(implicit SRW)]>;
}
def BIT8ri : I8ri<0x0,
- (outs), (ins GR8:$src1, i8imm:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, imm:$src2), 0),
+ (outs), (ins GR8:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, imm:$src2), 0),
(implicit SRW)]>;
def BIT16ri : I16ri<0x0,
- (outs), (ins GR16:$src1, i16imm:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, imm:$src2), 0),
+ (outs), (ins GR16:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, imm:$src2), 0),
(implicit SRW)]>;
def BIT8rm : I8rm<0x0,
- (outs), (ins GR8:$src1, memdst:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su GR8:$src1, (load addr:$src2)), 0),
+ (outs), (ins GR8:$src, memdst:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0),
(implicit SRW)]>;
def BIT16rm : I16rm<0x0,
- (outs), (ins GR16:$src1, memdst:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su GR16:$src1, (load addr:$src2)), 0),
+ (outs), (ins GR16:$src, memdst:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0),
(implicit SRW)]>;
def BIT8mr : I8mr<0x0,
- (outs), (ins memsrc:$src1, GR8:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), GR8:$src2), 0),
+ (outs), (ins memsrc:$src, GR8:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0),
(implicit SRW)]>;
def BIT16mr : I16mr<0x0,
- (outs), (ins memsrc:$src1, GR16:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), GR16:$src2), 0),
+ (outs), (ins memsrc:$src, GR16:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0),
(implicit SRW)]>;
def BIT8mi : I8mi<0x0,
- (outs), (ins memsrc:$src1, i8imm:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), (i8 imm:$src2)), 0),
+ (outs), (ins memsrc:$src, i8imm:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0),
(implicit SRW)]>;
def BIT16mi : I16mi<0x0,
- (outs), (ins memsrc:$src1, i16imm:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (load addr:$src1), (i16 imm:$src2)), 0),
+ (outs), (ins memsrc:$src, i16imm:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0),
(implicit SRW)]>;
def BIT8mm : I8mm<0x0,
- (outs), (ins memsrc:$src1, memsrc:$src2),
- "bit.b\t{$src2, $src1}",
- [(MSP430cmp (and_su (i8 (load addr:$src1)),
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.b\t{$src2, $src}",
+ [(MSP430cmp (and_su (i8 (load addr:$src)),
(load addr:$src2)),
0),
(implicit SRW)]>;
def BIT16mm : I16mm<0x0,
- (outs), (ins memsrc:$src1, memsrc:$src2),
- "bit.w\t{$src2, $src1}",
- [(MSP430cmp (and_su (i16 (load addr:$src1)),
+ (outs), (ins memsrc:$src, memsrc:$src2),
+ "bit.w\t{$src2, $src}",
+ [(MSP430cmp (and_su (i16 (load addr:$src)),
(load addr:$src2)),
0),
(implicit SRW)]>;
@@ -1134,12 +1139,12 @@ def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>;
def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>;
def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)),
- (ADD16ri GR16:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)),
- (ADD16ri GR16:$src1, texternalsym:$src2)>;
-def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)),
- (ADD16ri GR16:$src1, tblockaddress:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)),
+ (ADD16ri GR16:$src, tglobaladdr:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)),
+ (ADD16ri GR16:$src, texternalsym:$src2)>;
+def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)),
+ (ADD16ri GR16:$src, tblockaddress:$src2)>;
def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst),
(MOV16mi addr:$dst, tglobaladdr:$src)>;
@@ -1155,45 +1160,45 @@ def : Pat<(MSP430call (i16 texternalsym:$dst)),
(CALLi texternalsym:$dst)>;
// add and sub always produce carry
-def : Pat<(addc GR16:$src1, GR16:$src2),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(addc GR16:$src1, (load addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(addc GR16:$src1, imm:$src2),
- (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(addc GR16:$src, GR16:$src2),
+ (ADD16rr GR16:$src, GR16:$src2)>;
+def : Pat<(addc GR16:$src, (load addr:$src2)),
+ (ADD16rm GR16:$src, addr:$src2)>;
+def : Pat<(addc GR16:$src, imm:$src2),
+ (ADD16ri GR16:$src, imm:$src2)>;
def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst),
(ADD16mr addr:$dst, GR16:$src)>;
def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
(ADD16mm addr:$dst, addr:$src)>;
-def : Pat<(addc GR8:$src1, GR8:$src2),
- (ADD8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(addc GR8:$src1, (load addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(addc GR8:$src1, imm:$src2),
- (ADD8ri GR8:$src1, imm:$src2)>;
+def : Pat<(addc GR8:$src, GR8:$src2),
+ (ADD8rr GR8:$src, GR8:$src2)>;
+def : Pat<(addc GR8:$src, (load addr:$src2)),
+ (ADD8rm GR8:$src, addr:$src2)>;
+def : Pat<(addc GR8:$src, imm:$src2),
+ (ADD8ri GR8:$src, imm:$src2)>;
def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst),
(ADD8mr addr:$dst, GR8:$src)>;
def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
(ADD8mm addr:$dst, addr:$src)>;
-def : Pat<(subc GR16:$src1, GR16:$src2),
- (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(subc GR16:$src1, (load addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(subc GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
+def : Pat<(subc GR16:$src, GR16:$src2),
+ (SUB16rr GR16:$src, GR16:$src2)>;
+def : Pat<(subc GR16:$src, (load addr:$src2)),
+ (SUB16rm GR16:$src, addr:$src2)>;
+def : Pat<(subc GR16:$src, imm:$src2),
+ (SUB16ri GR16:$src, imm:$src2)>;
def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst),
(SUB16mr addr:$dst, GR16:$src)>;
def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst),
(SUB16mm addr:$dst, addr:$src)>;
-def : Pat<(subc GR8:$src1, GR8:$src2),
- (SUB8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(subc GR8:$src1, (load addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(subc GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
+def : Pat<(subc GR8:$src, GR8:$src2),
+ (SUB8rr GR8:$src, GR8:$src2)>;
+def : Pat<(subc GR8:$src, (load addr:$src2)),
+ (SUB8rm GR8:$src, addr:$src2)>;
+def : Pat<(subc GR8:$src, imm:$src2),
+ (SUB8ri GR8:$src, imm:$src2)>;
def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
(SUB8mr addr:$dst, GR8:$src)>;
def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
@@ -1201,6 +1206,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
// peephole patterns
def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
-def : Pat<(MSP430cmp (trunc (and_su GR16:$src1, GR16:$src2)), 0),
- (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit),
+def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit),
(EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 0cae267..608ca49 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -71,48 +71,6 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
-const TargetRegisterClass *const *
-MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- const Function* F = MF->getFunction();
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesFP[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesIntrFP[] = {
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, &MSP430::GR16RegClass,
- &MSP430::GR16RegClass, 0
- };
-
- if (hasFP(*MF))
- return (F->getCallingConv() == CallingConv::MSP430_INTR ?
- CalleeSavedRegClassesIntrFP : CalleeSavedRegClassesFP);
- else
- return (F->getCallingConv() == CallingConv::MSP430_INTR ?
- CalleeSavedRegClassesIntr : CalleeSavedRegClasses);
-}
-
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
@@ -270,8 +228,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
- int ATTRIBUTE_UNUSED FrameIdx =
- MF.getFrameInfo()->CreateFixedObject(2, -4, true, false);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+ (void)FrameIdx;
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
"Slot for FPW register must be last in order to be found!");
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index c8684df..6e58d31 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -36,9 +36,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const;
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index 4ef017a..2037a91 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -180,7 +180,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl<char> &OutName,
ManglerPrefixTy PrefixTy = Mangler::Default;
if (GV->hasPrivateLinkage() || isImplicitlyPrivate)
PrefixTy = Mangler::Private;
- else if (GV->hasLinkerPrivateLinkage())
+ else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage())
PrefixTy = Mangler::LinkerPrivate;
// If this global has a name, handle it simply.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 4d7fe4c..8ae05b7 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -133,8 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
const MachineFrameInfo *MFI = MF->getFrameInfo();
const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg());
- if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg);
+ if (Mips::CPURegsRegisterClass->contains(Reg))
CPUBitmask |= (1 << RegNum);
else
FPUBitmask |= (1 << RegNum);
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index e979c3f..b6ff2c3 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -284,6 +284,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, copy0MBB);
+ F->insert(It, sinkMBB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Next, add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
// Emit the right instruction according to the type of the operands compared
if (isFPCmp) {
@@ -296,20 +308,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg())
.addReg(Mips::ZERO).addMBB(sinkMBB);
- F->insert(It, copy0MBB);
- F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
- e = BB->succ_end(); i != e; ++i)
- sinkMBB->addSuccessor(*i);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while(!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
-
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -322,11 +320,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(Mips::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(3).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
}
@@ -490,21 +489,21 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op,
// %gp_rel relocation
if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) {
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GPREL);
SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1);
SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32);
return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode);
}
// %hi/%lo relocation
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_ABS_HILO);
SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1);
SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA);
return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo);
} else {
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0,
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
MipsII::MO_GOT);
SDValue ResNode = DAG.getLoad(MVT::i32, dl,
DAG.getEntryNode(), GA, NULL, 0,
@@ -768,6 +767,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -787,7 +787,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the stack (even if less than 4 are used as arguments)
if (Subtarget->isABI_O32()) {
int VTsize = EVT(MVT::i32).getSizeInBits()/8;
- MFI->CreateFixedObject(VTsize, (VTsize*3), true, false);
+ MFI->CreateFixedObject(VTsize, (VTsize*3), true);
CCInfo.AnalyzeCallOperands(Outs,
isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32);
} else
@@ -808,7 +808,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
CCValAssign &VA = ArgLocs[i];
// Promote the value if needed.
@@ -857,7 +857,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// if O32 ABI is used. For EABI the first address is zero.
LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc, true, false);
+ LastArgStackLoc, true);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
@@ -889,7 +889,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// node so that legalize doesn't hack it.
unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG;
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
getPointerTy(), 0, OpFlag);
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
@@ -929,7 +929,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the frame index only once. SPOffset here can be anything
// (this will be fixed on processFunctionBeforeFrameFinalized)
if (MipsFI->getGPStackOffset() == -1) {
- FI = MFI->CreateFixedObject(4, 0, true, false);
+ FI = MFI->CreateFixedObject(4, 0, true);
MipsFI->setGPFI(FI);
}
MipsFI->setGPStackOffset(LastArgStackLoc);
@@ -1098,7 +1098,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true);
MipsFI->recordLoadArgsFI(FI, -(ArgSize+
(FirstStackArgLoc + VA.getLocMemOffset())));
@@ -1137,7 +1137,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC);
SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32);
- int FI = MFI->CreateFixedObject(4, 0, true, false);
+ int FI = MFI->CreateFixedObject(4, 0, true);
MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
@@ -1169,6 +1169,7 @@ SDValue
MipsTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
@@ -1198,7 +1199,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index f2de489..460747b 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -120,6 +120,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -128,6 +129,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual MachineBasicBlock *
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 4005e35..6c09a3e 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -127,61 +127,75 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
-bool MipsInstrInfo::
-copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
+void MipsInstrInfo::
+copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool DestCPU = Mips::CPURegsRegClass.contains(DestReg);
+ bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg);
+
+ // CPU-CPU is the most common.
+ if (DestCPU && SrcCPU) {
+ BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- if (DestRC != SrcRC) {
-
- // Copy to/from FCR31 condition register
- if ((DestRC == Mips::CPURegsRegisterClass) &&
- (SrcRC == Mips::CCRRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg);
- else if ((DestRC == Mips::CCRRegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg);
-
- // Moves between coprocessors and cpu
- else if ((DestRC == Mips::CPURegsRegisterClass) &&
- (SrcRC == Mips::FGR32RegisterClass))
- BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg);
- else if ((DestRC == Mips::FGR32RegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass))
- BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
-
- // Move from/to Hi/Lo registers
- else if ((DestRC == Mips::HILORegisterClass) &&
- (SrcRC == Mips::CPURegsRegisterClass)) {
- unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO;
- BuildMI(MBB, I, DL, get(Opc), DestReg);
- } else if ((SrcRC == Mips::HILORegisterClass) &&
- (DestRC == Mips::CPURegsRegisterClass)) {
- unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
- BuildMI(MBB, I, DL, get(Opc), DestReg);
- } else
- // Can't copy this register
- return false;
+ // Copy to CPU from other registers.
+ if (DestCPU) {
+ if (Mips::CCRRegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(SrcReg))
+ BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SrcReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg);
+ else if (SrcReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg);
+ else
+ llvm_unreachable("Copy to CPU from invalid register");
+ return;
+ }
- return true;
+ // Copy to other registers from CPU.
+ if (SrcCPU) {
+ if (Mips::CCRRegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (Mips::FGR32RegClass.contains(DestReg))
+ BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::HI)
+ BuildMI(MBB, I, DL, get(Mips::MTHI))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (DestReg == Mips::LO)
+ BuildMI(MBB, I, DL, get(Mips::MTLO))
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ llvm_unreachable("Copy from CPU to invalid register");
+ return;
}
- if (DestRC == Mips::CPURegsRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO)
- .addReg(SrcReg);
- else if (DestRC == Mips::FGR32RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg);
- else if (DestRC == Mips::AFGR64RegisterClass)
- BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg);
- else if (DestRC == Mips::CCRRegisterClass)
- BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg);
- else
- // Can't copy this register
- return false;
+ if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
- return true;
+ if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+
+ if (Mips::CCRRegClass.contains(DestReg, SrcReg)) {
+ BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
+ }
+ llvm_unreachable("Cannot copy registers");
}
void MipsInstrInfo::
@@ -247,80 +261,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Register class not handled!");
}
-MachineInstr *MipsInstrInfo::
-foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops, int FI) const
-{
- if (Ops.size() != 1) return NULL;
-
- MachineInstr *NewMI = NULL;
-
- switch (MI->getOpcode()) {
- case Mips::ADDu:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(1).isReg()) &&
- (MI->getOperand(1).getReg() == Mips::ZERO) &&
- (MI->getOperand(2).isReg())) {
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(2).getReg();
- bool isKill = MI->getOperand(2).isKill();
- bool isUndef = MI->getOperand(2).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- case Mips::FMOV_S32:
- case Mips::FMOV_D32:
- if ((MI->getOperand(0).isReg()) &&
- (MI->getOperand(1).isReg())) {
- const TargetRegisterClass
- *RC = RI.getRegClass(MI->getOperand(0).getReg());
- unsigned StoreOpc, LoadOpc;
- bool IsMips1 = TM.getSubtarget<MipsSubtarget>().isMips1();
-
- if (RC == Mips::FGR32RegisterClass) {
- LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1;
- } else {
- assert(RC == Mips::AFGR64RegisterClass);
- // Mips1 doesn't have ldc/sdc instructions.
- if (IsMips1) break;
- LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1;
- }
-
- if (Ops[0] == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(2).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc))
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI) ;
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc))
- .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
- getUndefRegState(isUndef))
- .addImm(0).addFrameIndex(FI);
- }
- }
- break;
- }
-
- return NewMI;
-}
-
//===----------------------------------------------------------------------===//
// Branch Analysis
//===----------------------------------------------------------------------===//
@@ -520,9 +460,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
unsigned MipsInstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) &&
@@ -531,18 +470,18 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch?
- BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB);
} else {
// Conditional branch.
unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm());
const TargetInstrDesc &TID = get(Opc);
if (TID.getNumOperands() == 3)
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
.addReg(Cond[2].getReg())
.addMBB(TBB);
else
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg())
.addMBB(TBB);
}
@@ -554,12 +493,12 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
const TargetInstrDesc &TID = get(Opc);
if (TID.getNumOperands() == 3)
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg())
.addMBB(TBB);
else
- BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB);
+ BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB);
return 2;
}
@@ -621,12 +560,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
- bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP,
- Mips::CPURegsRegisterClass,
- Mips::CPURegsRegisterClass,
- DebugLoc());
- assert(Ok && "Couldn't assign to global base register!");
- Ok = Ok; // Silence warning when assertions are turned off.
+ BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY),
+ GlobalBaseReg).addReg(Mips::GP);
RegInfo.addLiveIn(Mips::GP);
MipsFI->setGlobalBaseReg(GlobalBaseReg);
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 7919d9a..d6f87f9 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -204,13 +204,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
@@ -223,18 +222,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 2b9e941..5337c9f 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -541,7 +541,7 @@ let Predicates = [HasSwap] in {
def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>;
def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>;
-let Predicates = [HasCondMov], isTwoAddress = 1 in {
+let Predicates = [HasCondMov], Constraints = "$F = $dst" in {
def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>;
def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 5e719af..e15f0a5 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -116,34 +116,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const
return BitMode32CalleeSavedRegs;
}
-/// Mips Callee Saved Register Classes
-const TargetRegisterClass* const*
-MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const
-{
- static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = {
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
- };
-
- static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = {
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::CPURegsRegClass, &Mips::CPURegsRegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass,
- &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0
- };
-
- if (Subtarget.isSingleFloat())
- return SingleFloatOnlyCalleeSavedRC;
- else
- return BitMode32CalleeSavedRC;
-}
-
BitVector MipsRegisterInfo::
getReservedRegs(const MachineFunction &MF) const
{
@@ -279,7 +251,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign);
for (unsigned i = 0, e = CSI.size(); i != e ; ++i) {
- if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ if (!Mips::CPURegsRegisterClass->contains(Reg))
break;
MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
TopCPUSavedRegOff = StackOffset;
@@ -311,7 +284,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
// Adjust FPU Callee Saved Registers Area. This Area must be
// aligned to the default Stack Alignment requirements.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass)
+ unsigned Reg = CSI[i].getReg();
+ if (Mips::CPURegsRegisterClass->contains(Reg))
continue;
MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset);
TopFPUSavedRegOff = StackOffset;
@@ -528,4 +502,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
}
#include "MipsGenRegisterInfo.inc"
-
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index bc857b8..b500a65 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -42,9 +42,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index f479f46..54a6a28 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -672,7 +672,8 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N,
// FIXME there isn't really debug info here
DebugLoc dl = G->getDebugLoc();
- SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8,
+ SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(),
+ MVT::i8,
G->getOffset());
SDValue Offset = DAG.getConstant(0, MVT::i8);
@@ -1120,6 +1121,7 @@ SDValue PIC16TargetLowering::
LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue DataAddr_Lo, SDValue DataAddr_Hi,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG) const {
unsigned NumOps = Outs.size();
@@ -1136,7 +1138,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
unsigned RetVals = Ins.size();
for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) {
// Get the arguments
- Arg = Outs[i].Val;
+ Arg = OutVals[i];
Ops.clear();
Ops.push_back(Chain);
@@ -1158,6 +1160,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue PIC16TargetLowering::
LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
unsigned NumOps = Outs.size();
std::string Name;
@@ -1183,7 +1186,7 @@ LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
for (unsigned i=0, Offset = 0; i<NumOps; i++) {
// Get the argument
- Arg = Outs[i].Val;
+ Arg = OutVals[i];
StoreOffset = (Offset + AddressOffset);
// Store the argument on frame
@@ -1282,6 +1285,7 @@ SDValue
PIC16TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// Number of values to return
@@ -1298,7 +1302,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain,
SDValue BS = DAG.getConstant(1, MVT::i8);
SDValue RetVal;
for(unsigned i=0;i<NumRet; ++i) {
- RetVal = Outs[i].Val;
+ RetVal = OutVals[i];
Chain = DAG.getNode (PIC16ISD::PIC16Store, dl, MVT::Other, Chain, RetVal,
ES, BS,
DAG.getConstant (i, MVT::i8));
@@ -1374,6 +1378,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1428,7 +1433,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Considering the GlobalAddressNode case here.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
const GlobalValue *GV = G->getGlobal();
- Callee = DAG.getTargetGlobalAddress(GV, MVT::i8);
+ Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8);
Name = G->getGlobal()->getName();
} else {// Considering the ExternalSymbol case here
ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Callee);
@@ -1461,12 +1466,13 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SDValue CallArgs;
if (IsDirectCall) {
CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag,
- Outs, dl, DAG);
+ Outs, OutVals, dl, DAG);
Chain = getChain(CallArgs);
OperFlag = getOutFlag(CallArgs);
} else {
CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo,
- DataAddr_Hi, Outs, Ins, dl, DAG);
+ DataAddr_Hi, Outs, OutVals, Ins,
+ dl, DAG);
Chain = getChain(CallArgs);
OperFlag = getOutFlag(CallArgs);
}
@@ -1791,14 +1797,14 @@ static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) {
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
CC == ISD::SETNE &&
(LHS.getOpcode() == PIC16ISD::SELECT_ICC &&
LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) &&
isa<ConstantSDNode>(LHS.getOperand(0)) &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -1928,15 +1934,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
@@ -1953,11 +1956,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(PIC16::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h
index eea17f8..0a7506c 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.h
+++ b/lib/Target/PIC16/PIC16ISelLowering.h
@@ -106,12 +106,14 @@ namespace llvm {
SDValue
LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue
LowerIndirectCallArguments(SDValue Chain, SDValue InFlag,
SDValue DataAddr_Lo, SDValue DataAddr_Hi,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG) const;
@@ -143,6 +145,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -151,6 +154,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 793dd9f..e784f74 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -151,25 +151,20 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
llvm_unreachable("Can't load this register from stack slot");
}
-bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- if (DestRC == PIC16::FSR16RegisterClass) {
- BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
- return true;
- }
-
- if (DestRC == PIC16::GPRRegisterClass) {
- BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg);
- return true;
- }
+void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PIC16::FSR16RegClass.contains(DestReg, SrcReg))
+ Opc = PIC16::copy_fsr;
+ else if (PIC16::GPRRegClass.contains(DestReg, SrcReg))
+ Opc = PIC16::copy_w;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
- // Not yet supported.
- return false;
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
@@ -196,15 +191,15 @@ bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned PIC16InstrInfo::
InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch?
- DebugLoc dl;
- BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB);
}
return 1;
}
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 40a4cb4..a3a77f1 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -57,12 +57,10 @@ public:
unsigned DestReg, int FrameIndex,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
@@ -70,7 +68,8 @@ public:
virtual
unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td
index 24df251..86d36cb 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.td
+++ b/lib/Target/PIC16/PIC16InstrInfo.td
@@ -134,7 +134,7 @@ include "PIC16InstrFormats.td"
//===----------------------------------------------------------------------===//
// W = W Op F : Load the value from F and do Op to W.
-let isTwoAddress = 1, mayLoad = 1 in
+let Constraints = "$src = $dst", mayLoad = 1 in
class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
@@ -146,7 +146,7 @@ class BinOpFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
// F = F Op W : Load the value from F, do op with W and store in F.
// This insn class is not marked as TwoAddress because the reg is
// being used as a source operand only. (Remember a TwoAddress insn
-// needs a copyRegToReg.)
+// needs a copy.)
let mayStore = 1 in
class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs),
@@ -160,7 +160,7 @@ class BinOpWF<bits<6> OpCode, string OpcStr, SDNode OpNode>:
)]>;
// W = W Op L : Do Op of L with W and place result in W.
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class BinOpWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
@@ -220,7 +220,7 @@ def set_fsrlo:
"movwf ${fsr}L",
[]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def set_fsrhi:
ByteFormat<0, (outs FSR16:$dst),
(ins FSR16:$src, GPR:$val),
@@ -234,8 +234,8 @@ def set_pclath:
[(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>;
//----------------------------
-// copyRegToReg
-// copyRegToReg insns. These are dummy. They should always be deleted
+// copyPhysReg
+// copyPhysReg insns. These are dummy. They should always be deleted
// by the optimizer and never be present in the final generated code.
// if they are, then we have to write correct macros for these insns.
//----------------------------
@@ -362,7 +362,7 @@ def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry.
}
// W -= [F] ; load from F and sub the value from W.
-let isTwoAddress = 1, mayLoad = 1 in
+let Constraints = "$src = $dst", mayLoad = 1 in
class SUBFW<bits<6> OpCode, string OpcStr, SDNode OpNode>:
ByteFormat<OpCode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$offset, i8mem:$ptrlo, i8imm:$ptrhi),
@@ -418,7 +418,7 @@ def orlw : BinOpWL<0, "iorlw", or>;
// sublw
// W = C - W ; sub W from literal. (Without borrow).
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
@@ -426,7 +426,7 @@ class SUBLW<bits<6> opcode, string OpcStr, SDNode OpNode> :
[(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>;
// subwl
// W = W - C ; sub literal from W (Without borrow).
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
class SUBWL<bits<6> opcode, string OpcStr, SDNode OpNode> :
LiteralFormat<opcode, (outs GPR:$dst),
(ins GPR:$src, i8imm:$literal),
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index ab81ed1..241170b 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -117,7 +117,7 @@ bool MemSelOpt::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
DebugLoc dl = I->getDebugLoc();
BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$");
Changed = true;
- PageChanged = 0;
+ PageChanged = 0;
}
}
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
index c282521..27f1cf5 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -150,8 +150,8 @@ void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
// For PIC16, automatic variables of a function are emitted as globals.
-// Clone the auto variables of a function and put them in ValueMap,
-// this ValueMap will be used while
+// Clone the auto variables of a function and put them in VMap,
+// this VMap will be used while
// Cloning the code of function itself.
//
void PIC16Cloner::CloneAutos(Function *F) {
@@ -160,11 +160,11 @@ void PIC16Cloner::CloneAutos(Function *F) {
Module *M = F->getParent();
Module::GlobalListType &Globals = M->getGlobalList();
- // Clear the leftovers in ValueMap by any previous cloning.
- ValueMap.clear();
+ // Clear the leftovers in VMap by any previous cloning.
+ VMap.clear();
// Find the auto globls for this function and clone them, and put them
- // in ValueMap.
+ // in VMap.
std::string FnName = F->getName().str();
std::string VarName, ClonedVarName;
for (Module::global_iterator I = M->global_begin(), E = M->global_end();
@@ -182,8 +182,8 @@ void PIC16Cloner::CloneAutos(Function *F) {
// Add these new globals to module's globals list.
Globals.push_back(ClonedGV);
- // Update ValueMap.
- ValueMap[GV] = ClonedGV;
+ // Update VMap.
+ VMap[GV] = ClonedGV;
}
}
}
@@ -236,10 +236,10 @@ void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
}
// Clone the given function and return it.
-// Note: it uses the ValueMap member of the class, which is already populated
+// Note: it uses the VMap member of the class, which is already populated
// by cloneAutos by the time we reach here.
-// FIXME: Should we just pass ValueMap's ref as a parameter here? rather
-// than keeping the ValueMap as a member.
+// FIXME: Should we just pass VMap's ref as a parameter here? rather
+// than keeping the VMap as a member.
Function *
PIC16Cloner::cloneFunction(Function *OrgF) {
Function *ClonedF;
@@ -252,11 +252,11 @@ PIC16Cloner::cloneFunction(Function *OrgF) {
}
// Clone does not exist.
- // First clone the autos, and populate ValueMap.
+ // First clone the autos, and populate VMap.
CloneAutos(OrgF);
// Now create the clone.
- ClonedF = CloneFunction(OrgF, ValueMap);
+ ClonedF = CloneFunction(OrgF, VMap);
// The new function should be for interrupt line. Therefore should have
// the name suffixed with IL and section attribute marked with IL.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
index 24c1152..e8b5aa4 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -15,7 +15,7 @@
#ifndef PIC16CLONER_H
#define PIC16CLONER_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
using namespace llvm;
using std::vector;
@@ -72,7 +72,7 @@ namespace llvm {
// the corresponding cloned auto variable of the cloned function.
// This value map is passed during the function cloning so that all the
// uses of auto variables be updated properly.
- DenseMap<const Value*, Value*> ValueMap;
+ ValueMap<const Value*, Value*> VMap;
// Map of a already cloned functions.
map<Function *, Function *> ClonedFunctionMap;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index 30a1d4a..dff98d1 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -35,13 +35,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-// PIC16 Callee Saved Reg Classes
-const TargetRegisterClass* const*
-PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved;
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 6a9a038..5536a61 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -41,10 +41,6 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual const unsigned*
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- // PIC16 callee saved register classes
- virtual const TargetRegisterClass* const *
- getCalleeSavedRegClasses(const MachineFunction *MF) const;
-
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
virtual bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 66dfd4b..db11fde 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
isLoad = TID.mayLoad();
isStore = TID.mayStore();
- unsigned TSFlags = TID.TSFlags;
+ uint64_t TSFlags = TID.TSFlags;
isFirst = TSFlags & PPCII::PPC970_First;
isSingle = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 10b516a..d47d989 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1203,11 +1203,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- const GlobalValue *GV = GSDN->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
- SDValue Zero = DAG.getConstant(0, PtrVT);
// FIXME there isn't really any debug info here
DebugLoc dl = GSDN->getDebugLoc();
+ const GlobalValue *GV = GSDN->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset());
+ SDValue Zero = DAG.getConstant(0, PtrVT);
const TargetMachine &TM = DAG.getTarget();
@@ -1631,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable, false);
+ isImmutable);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -1700,8 +1700,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
FuncInfo->setVarArgsStackOffset(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- CCInfo.getNextStackOffset(),
- true, false));
+ CCInfo.getNextStackOffset(), true));
FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
@@ -1911,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -1936,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0,
@@ -2062,7 +2061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
CurArgOffset + (ArgSize - ObjSize),
- isImmutable, false);
+ isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0,
false, false, 0);
@@ -2097,7 +2096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
FuncInfo->setVarArgsFrameIndex(
MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth, true, false));
+ Depth, true));
SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
// If this function is vararg, store any remaining integer argument regs
@@ -2137,6 +2136,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
unsigned CC,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
unsigned &nAltivecParamsAtEnd) {
// Count how many bytes are to be pushed on the stack, including the linkage
// area, and parameter passing area. We start with 24/48 bytes, which is
@@ -2153,9 +2153,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG,
// 16-byte aligned.
nAltivecParamsAtEnd = 0;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
- EVT ArgVT = Arg.getValueType();
+ EVT ArgVT = Outs[i].VT;
// Varargs Altivec parameters are padded to a 16 byte boundary.
if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 ||
ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) {
@@ -2314,8 +2314,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
isDarwinABI);
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- NewRetAddrLoc,
- true, false);
+ NewRetAddrLoc, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -2328,7 +2327,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewFPLoc =
SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
- true, false);
+ true);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
PseudoSourceValue::getFixedStack(NewFPIdx), 0,
@@ -2346,7 +2345,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
- int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false);
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue FIN = DAG.getFrameIndex(FI, VT);
TailCallArgumentInfo Info;
@@ -2472,7 +2471,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
// node so that legalize doesn't hack it.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl,
+ Callee.getValueType());
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType());
else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
@@ -2705,6 +2705,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2714,11 +2715,11 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) {
return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, Ins,
+ isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
} else {
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
- isTailCall, Outs, Ins,
+ isTailCall, Outs, OutVals, Ins,
dl, DAG, InVals);
}
}
@@ -2728,6 +2729,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2737,7 +2739,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
assert((CallConv == CallingConv::C ||
CallConv == CallingConv::Fast) && "Unknown calling convention!");
- EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
unsigned PtrByteSize = 4;
MachineFunction &MF = DAG.getMachineFunction();
@@ -2769,7 +2770,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
unsigned NumArgs = Outs.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Outs[i].Val.getValueType();
+ EVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
bool Result;
@@ -2838,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
i != e;
++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (Flags.isByVal()) {
@@ -2934,6 +2935,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -2961,7 +2963,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// prereserved space for [SP][CR][LR][3 x unused].
unsigned NumBytes =
CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv,
- Outs,
+ Outs, OutVals,
nAltivecParamsAtEnd);
// Calculate by how many bytes the stack has to be adjusted in case of tail
@@ -3025,7 +3027,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// PtrOff will be used to store the current argument to the stack if a
@@ -3051,7 +3053,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Everything else is passed left-justified.
EVT VT = (Size==1) ? MVT::i8 : MVT::i16;
if (GPR_idx != NumGPRs) {
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg,
NULL, 0, VT, false, false, 0);
MemOpChains.push_back(Load.getValue(1));
RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
@@ -3228,8 +3230,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
ArgOffset = ((ArgOffset+15)/16)*16;
ArgOffset += 12*16;
for (unsigned i = 0; i != NumOps; ++i) {
- SDValue Arg = Outs[i].Val;
- EVT ArgType = Arg.getValueType();
+ SDValue Arg = OutVals[i];
+ EVT ArgType = Outs[i].VT;
if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 ||
ArgType==MVT::v8i16 || ArgType==MVT::v16i8) {
if (++j > NumVRs) {
@@ -3297,6 +3299,7 @@ SDValue
PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -3318,7 +3321,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
Flag = Chain.getValue(1);
}
@@ -3376,8 +3379,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// Find out what the fix offset of the frame pointer save area.
int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset,
- true, false);
+ RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
@@ -3403,8 +3405,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
- true, false);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -4518,7 +4519,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
unsigned TmpReg = (!BinOpcode) ? incr :
@@ -4583,7 +4587,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
F->insert(It, loopMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
@@ -4716,23 +4723,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
unsigned SelectPred = MI->getOperand(4).getImm();
DebugLoc dl = MI->getDebugLoc();
- BuildMI(BB, dl, TII->get(PPC::BCC))
- .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII->get(PPC::BCC))
+ .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -4745,7 +4751,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII->get(PPC::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
}
@@ -4831,7 +4838,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
// thisMBB:
// ...
@@ -4899,7 +4909,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
F->insert(It, loop2MBB);
F->insert(It, midMBB);
F->insert(It, exitMBB);
- exitMBB->transferSuccessors(BB);
+ exitMBB->splice(exitMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ exitMBB->transferSuccessorsAndUpdatePHIs(BB);
MachineRegisterInfo &RegInfo = F->getRegInfo();
const TargetRegisterClass *RC =
@@ -5025,7 +5038,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
llvm_unreachable("Unexpected instr type to insert");
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -5042,19 +5055,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case PPCISD::SHL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0) // 0 << V -> 0.
+ if (C->isNullValue()) // 0 << V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRL:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0) // 0 >>u V -> 0.
+ if (C->isNullValue()) // 0 >>u V -> 0.
return N->getOperand(0);
}
break;
case PPCISD::SRA:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
- if (C->getZExtValue() == 0 || // 0 >>s V -> 0.
+ if (C->isNullValue() || // 0 >>s V -> 0.
C->isAllOnesValue()) // -1 >>s V -> -1.
return N->getOperand(0);
}
@@ -5380,11 +5393,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
-/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
-/// it means one of the asm constraint of the inline asm instruction being
-/// processed is 'm'.
+/// vector. If it is invalid, don't add anything to Ops.
void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0,0);
@@ -5443,7 +5453,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter,
}
// Handle standard constraint letters.
- TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG);
+ TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG);
}
// isLegalAddressingMode - Return true if the addressing mode represented
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 6dcaf1e..700816f 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -318,12 +318,9 @@ namespace llvm {
unsigned getByValTypeAlignment(const Type *Ty) const;
/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
- /// vector. If it is invalid, don't add anything to Ops. If hasMemory is
- /// true it means one of the asm constraint of the inline asm instruction
- /// being processed is 'm'.
+ /// vector. If it is invalid, don't add anything to Ops.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -438,6 +435,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -446,6 +444,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue
@@ -465,6 +464,7 @@ namespace llvm {
LowerCall_Darwin(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -472,6 +472,7 @@ namespace llvm {
LowerCall_SVR4(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1b7a778..1574aa3 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -316,9 +316,8 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -327,50 +326,46 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
// One-way branch.
if (FBB == 0) {
if (Cond.empty()) // Unconditional branch
- BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);
else // Conditional branch
- BuildMI(&MBB, dl, get(PPC::BCC))
+ BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
return 1;
}
// Two-way Conditional Branch.
- BuildMI(&MBB, dl, get(PPC::BCC))
+ BuildMI(&MBB, DL, get(PPC::BCC))
.addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB);
- BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);
return 2;
}
-bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == PPC::GPRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::G8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::F4RCRegisterClass ||
- DestRC == PPC::F8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::CRRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::VRRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::CRBITRCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else {
- // Attempt to copy register that is not GPR or FPR
- return false;
- }
-
- return true;
+void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (PPC::GPRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR;
+ else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::OR8;
+ else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::FMR;
+ else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::MCRF;
+ else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::VOR;
+ else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))
+ Opc = PPC::CROR;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ const TargetInstrDesc &TID = get(Opc);
+ if (TID.getNumOperands() == 3)
+ BuildMI(MBB, I, DL, TID, DestReg)
+ .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
+ else
+ BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -654,121 +649,6 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
return &*MIB;
}
-/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
-/// copy instructions, turning them into load/store instructions.
-MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const {
- if (Ops.size() != 1) return NULL;
-
- // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
- // it takes more than one instruction to store it.
- unsigned Opc = MI->getOpcode();
- unsigned OpNum = Ops[0];
-
- MachineInstr *NewMI = NULL;
- if ((Opc == PPC::OR &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if ((Opc == PPC::OR8 &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) {
- // The register may be F4RC or F8RC, and that determines the memory op.
- unsigned OrigReg = MI->getOperand(OpNum).getReg();
- // We cannot tell the register class from a physreg alone.
- if (TargetRegisterInfo::isPhysicalRegister(OrigReg))
- return NULL;
- const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg);
- const bool is64 = RC == PPC::F8RCRegisterClass;
-
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
- get(is64 ? PPC::STFD : PPC::STFS))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
- get(is64 ? PPC::LFD : PPC::LFS))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- }
-
- return NewMI;
-}
-
-bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const {
- if (Ops.size() != 1) return false;
-
- // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because
- // it takes more than one instruction to store it.
- unsigned Opc = MI->getOpcode();
-
- if ((Opc == PPC::OR &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
- return true;
- else if ((Opc == PPC::OR8 &&
- MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
- return true;
- else if (Opc == PPC::FMR || Opc == PPC::FMRSD)
- return true;
-
- return false;
-}
-
-
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 7a9e11b..eadb21e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -109,13 +109,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -135,23 +134,6 @@ public:
const MDNode *MDPtr,
DebugLoc DL) const;
- /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into
- /// copy instructions, turning them into load/store instructions.
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
- virtual bool canFoldMemoryOperand(const MachineInstr *MI,
- const SmallVectorImpl<unsigned> &Ops) const;
-
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 0ff852c..4d6132a9 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -269,140 +269,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- // 32-bit Darwin calling convention.
- static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = {
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- &PPC::GPRCRegClass, 0
- };
-
- // 32-bit SVR4 calling convention.
- static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = {
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
- &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRSAVERCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- 0
- };
-
- // 64-bit Darwin calling convention.
- static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = {
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- &PPC::G8RCRegClass, 0
- };
-
- // 64-bit SVR4 calling convention.
- static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = {
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
- &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,
-
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,
- &PPC::F8RCRegClass,&PPC::F8RCRegClass,
-
- &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass,
-
- &PPC::VRSAVERCRegClass,
-
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
- &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,
-
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,
- &PPC::CRBITRCRegClass,
-
- 0
- };
-
- if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses :
- Darwin32_CalleeSavedRegClasses;
-
- return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses
- : SVR4_CalleeSavedRegClasses;
-}
-
// needsFP - Return true if the specified function should have a dedicated frame
// pointer register. This is true if the function has variable sized allocas or
// if frame pointer elimination is disabled.
@@ -1060,8 +926,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset,
- true, false);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -1069,8 +934,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
- MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta,
- true, false);
+ MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
}
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
@@ -1127,9 +991,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RC = CSI[i].getRegClass();
-
- if (RC == PPC::GPRCRegisterClass) {
+ if (PPC::GPRCRegisterClass->contains(Reg)) {
HasGPSaveArea = true;
GPRegs.push_back(CSI[i]);
@@ -1137,7 +999,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinGPR) {
MinGPR = Reg;
}
- } else if (RC == PPC::G8RCRegisterClass) {
+ } else if (PPC::G8RCRegisterClass->contains(Reg)) {
HasG8SaveArea = true;
G8Regs.push_back(CSI[i]);
@@ -1145,7 +1007,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
if (Reg < MinG8R) {
MinG8R = Reg;
}
- } else if (RC == PPC::F8RCRegisterClass) {
+ } else if (PPC::F8RCRegisterClass->contains(Reg)) {
HasFPSaveArea = true;
FPRegs.push_back(CSI[i]);
@@ -1154,12 +1016,12 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
MinFPR = Reg;
}
// FIXME SVR4: Disable CR save area for now.
- } else if ( RC == PPC::CRBITRCRegisterClass
- || RC == PPC::CRRCRegisterClass) {
+ } else if (PPC::CRBITRCRegisterClass->contains(Reg)
+ || PPC::CRRCRegisterClass->contains(Reg)) {
// HasCRSaveArea = true;
- } else if (RC == PPC::VRSAVERCRegisterClass) {
+ } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
HasVRSAVESaveArea = true;
- } else if (RC == PPC::VRRCRegisterClass) {
+ } else if (PPC::VRRCRegisterClass->contains(Reg)) {
HasVRSaveArea = true;
VRegs.push_back(CSI[i]);
@@ -1240,9 +1102,10 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// which have the CR/CRBIT register class?
// Adjust the frame index of the CR spill slot.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- const TargetRegisterClass *RC = CSI[i].getRegClass();
+ unsigned Reg = CSI[i].getReg();
- if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) {
+ if (PPC::CRBITRCRegisterClass->contains(Reg) ||
+ PPC::CRRCRegisterClass->contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1257,9 +1120,9 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
// which have the VRSAVE register class?
// Adjust the frame index of the VRSAVE spill slot.
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
- const TargetRegisterClass *RC = CSI[i].getRegClass();
+ unsigned Reg = CSI[i].getReg();
- if (RC == PPC::VRSAVERCRegisterClass) {
+ if (PPC::VRSAVERCRegisterClass->contains(Reg)) {
int FI = CSI[i].getFrameIdx();
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
@@ -1762,4 +1625,3 @@ int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
}
#include "PPCGenRegisterInfo.inc"
-
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 43cf535..f026847 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -42,9 +42,6 @@ public:
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
/// targetHandlesStackFrameRounding - Returns true if the target is
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 7fa73ed..4d7ee08 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -300,6 +300,14 @@ unsigned long reverse(unsigned v) {
return v ^ (t >> 8);
}
+Neither is this (very standard idiom):
+
+int f(int n)
+{
+ return (((n) << 24) | (((n) & 0xff00) << 8)
+ | (((n) >> 8) & 0xff00) | ((n) >> 24));
+}
+
//===---------------------------------------------------------------------===//
[LOOP RECOGNITION]
@@ -898,17 +906,6 @@ The expression should optimize to something like
//===---------------------------------------------------------------------===//
-From GCC Bug 3756:
-int
-pn (int n)
-{
- return (n >= 0 ? 1 : -1);
-}
-Should combine to (n >> 31) | 1. Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
void a(int variable)
{
if (variable == 4 || variable == 6)
@@ -1439,33 +1436,6 @@ This pattern repeats several times, basically doing:
//===---------------------------------------------------------------------===//
-186.crafty contains this interesting pattern:
-
-%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0),
- i8* %30)
-%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0)
-br i1 %phitmp648, label %bb70, label %bb76
-
-bb70: ; preds = %OptionMatch.exit91, %bb69
- %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; <i32> [#uses=1]
-
-This is basically:
- cststr = "abcdef";
- if (strstr(cststr, P) == cststr) {
- x = strlen(P);
- ...
-
-The strstr call would be significantly cheaper written as:
-
-cststr = "abcdef";
-if (memcmp(P, str, strlen(P)))
- x = strlen(P);
-
-This is memcmp+strlen instead of strstr. This also makes the strlen fully
-redundant.
-
-//===---------------------------------------------------------------------===//
-
186.crafty also contains this code:
%1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0))
@@ -1863,3 +1833,91 @@ LLVM prefers comparisons with zero over non-zero in general, but in this
case it choses instead to keep the max operation obvious.
//===---------------------------------------------------------------------===//
+
+Take the following testcase on x86-64 (similar testcases exist for all targets
+with addc/adde):
+
+define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b,
+i64 %c) nounwind {
+entry:
+ %0 = zext i64 %a to i128 ; <i128> [#uses=1]
+ %1 = zext i64 %b to i128 ; <i128> [#uses=1]
+ %2 = add i128 %1, %0 ; <i128> [#uses=2]
+ %3 = zext i64 %c to i128 ; <i128> [#uses=1]
+ %4 = shl i128 %3, 64 ; <i128> [#uses=1]
+ %5 = add i128 %4, %2 ; <i128> [#uses=1]
+ %6 = lshr i128 %5, 64 ; <i128> [#uses=1]
+ %7 = trunc i128 %6 to i64 ; <i64> [#uses=1]
+ store i64 %7, i64* %s, align 8
+ %8 = trunc i128 %2 to i64 ; <i64> [#uses=1]
+ store i64 %8, i64* %t, align 8
+ ret void
+}
+
+Generated code:
+ addq %rcx, %rdx
+ movl $0, %eax
+ adcq $0, %rax
+ addq %r8, %rax
+ movq %rax, (%rdi)
+ movq %rdx, (%rsi)
+ ret
+
+Expected code:
+ addq %rcx, %rdx
+ adcq $0, %r8
+ movq %r8, (%rdi)
+ movq %rdx, (%rsi)
+ ret
+
+The generated SelectionDAG has an ADD of an ADDE, where both operands of the
+ADDE are zero. Replacing one of the operands of the ADDE with the other operand
+of the ADD, and replacing the ADD with the ADDE, should give the desired result.
+
+(That said, we are doing a lot better than gcc on this testcase. :) )
+
+//===---------------------------------------------------------------------===//
+
+Switch lowering generates less than ideal code for the following switch:
+define void @a(i32 %x) nounwind {
+entry:
+ switch i32 %x, label %if.end [
+ i32 0, label %if.then
+ i32 1, label %if.then
+ i32 2, label %if.then
+ i32 3, label %if.then
+ i32 5, label %if.then
+ ]
+if.then:
+ tail call void @foo() nounwind
+ ret void
+if.end:
+ ret void
+}
+declare void @foo()
+
+Generated code on x86-64 (other platforms give similar results):
+a:
+ cmpl $5, %edi
+ ja .LBB0_2
+ movl %edi, %eax
+ movl $47, %ecx
+ btq %rax, %rcx
+ jb .LBB0_3
+.LBB0_2:
+ ret
+.LBB0_3:
+ jmp foo # TAILCALL
+
+The movl+movl+btq+jb could be simplified to a cmpl+jne.
+
+Or, if we wanted to be really clever, we could simplify the whole thing to
+something like the following, which eliminates a branch:
+ xorl $1, %edi
+ cmpl $4, %edi
+ ja .LBB0_2
+ ret
+.LBB0_2:
+ jmp foo # TAILCALL
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index f47e53a..4099a62 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -38,6 +38,7 @@ SDValue
SparcTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to locations.
@@ -66,7 +67,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// Guarantee that all emitted copies are stuck together with flags.
Flag = Chain.getValue(1);
@@ -133,7 +134,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(Arg);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load;
if (ObjectVT == MVT::i32) {
@@ -146,7 +147,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8);
FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr,
DAG.getConstant(Offset, MVT::i32));
- Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr,
+ Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr,
NULL, 0, ObjectVT, false, false, 0);
Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load);
}
@@ -169,7 +170,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
InVals.push_back(Arg);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -192,7 +193,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -205,7 +206,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
} else {
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0,
false, false, 0);
@@ -239,7 +240,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
- true, false);
+ true);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0,
@@ -262,6 +263,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -283,7 +285,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Count the size of the outgoing arguments.
unsigned ArgsSize = 0;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) {
+ switch (Outs[i].VT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown value type!");
case MVT::i1:
case MVT::i8:
@@ -316,7 +318,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -358,8 +360,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
unsigned ArgOffset = 68;
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- SDValue Val = Outs[i].Val;
- EVT ObjectVT = Val.getValueType();
+ SDValue Val = OutVals[i];
+ EVT ObjectVT = Outs[i].VT;
SDValue ValToStore(0, 0);
unsigned ObjSize;
switch (ObjectVT.getSimpleVT().SimpleTy) {
@@ -478,7 +480,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
@@ -737,7 +739,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
ISD::CondCode CC, unsigned &SPCC) {
if (isa<ConstantSDNode>(RHS) &&
- cast<ConstantSDNode>(RHS)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(RHS)->isNullValue() &&
CC == ISD::SETNE &&
((LHS.getOpcode() == SPISD::SELECT_ICC &&
LHS.getOperand(3).getOpcode() == SPISD::CMPICC) ||
@@ -745,8 +747,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS,
LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) &&
isa<ConstantSDNode>(LHS.getOperand(0)) &&
isa<ConstantSDNode>(LHS.getOperand(1)) &&
- cast<ConstantSDNode>(LHS.getOperand(0))->getZExtValue() == 1 &&
- cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 0) {
+ cast<ConstantSDNode>(LHS.getOperand(0))->isOne() &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->isNullValue()) {
SDValue CMPCC = LHS.getOperand(3);
SPCC = cast<ConstantSDNode>(LHS.getOperand(2))->getZExtValue();
LHS = CMPCC.getOperand(0);
@@ -759,7 +761,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op,
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
// FIXME there isn't really any debug info here
DebugLoc dl = Op.getDebugLoc();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA);
SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA);
@@ -1007,21 +1009,20 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
+ // Add the true and fallthrough blocks as its successors.
+ BB->addSuccessor(copy0MBB);
+ BB->addSuccessor(sinkMBB);
+
BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
- // Next, add the true and fallthrough blocks as its successors.
- BB->addSuccessor(copy0MBB);
- BB->addSuccessor(sinkMBB);
// copy0MBB:
// %FalseValue = ...
@@ -1035,11 +1036,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 5ebdcac..db39e08 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -86,6 +86,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -94,6 +95,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 8e49eca..3a4c80a 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -109,38 +109,29 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
unsigned
SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond)const{
- // FIXME this should probably take a DebugLoc argument
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
// Can only insert uncond branches so far.
assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
- BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB);
return 1;
}
-bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
- if (DestRC != SrcRC) {
- // Not yet supported!
- return false;
- }
-
- if (DestRC == SP::IntRegsRegisterClass)
- BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
- else if (DestRC == SP::FPRegsRegisterClass)
- BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg);
- else if (DestRC == SP::DFPRegsRegisterClass)
- BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg)
- .addReg(SrcReg);
+void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ if (SP::IntRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::FPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg))
+ BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
else
- // Can't copy this register
- return false;
-
- return true;
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void SparcInstrInfo::
@@ -183,61 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
llvm_unreachable("Can't load this register from stack slot");
}
-MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FI) const {
- if (Ops.size() != 1) return NULL;
-
- unsigned OpNum = Ops[0];
- bool isFloat = false;
- MachineInstr *NewMI = NULL;
- switch (MI->getOpcode()) {
- case SP::ORrr:
- if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&&
- MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) {
- if (OpNum == 0) // COPY -> STORE
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(MI->getOperand(2).getReg());
- else // COPY -> LOAD
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri),
- MI->getOperand(0).getReg())
- .addFrameIndex(FI)
- .addImm(0);
- }
- break;
- case SP::FMOVS:
- isFloat = true;
- // FALLTHROUGH
- case SP::FMOVD:
- if (OpNum == 0) { // COPY -> STORE
- unsigned SrcReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(),
- get(isFloat ? SP::STFri : SP::STDFri))
- .addFrameIndex(FI)
- .addImm(0)
- .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef));
- } else { // COPY -> LOAD
- unsigned DstReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(),
- get(isFloat ? SP::LDFri : SP::LDDFri))
- .addReg(DstReg, RegState::Define |
- getDeadRegState(isDead) | getUndefRegState(isUndef))
- .addFrameIndex(FI)
- .addImm(0);
- }
- break;
- }
-
- return NewMI;
-}
-
unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const
{
SparcMachineFunctionInfo *SparcFI = MF->getInfo<SparcMachineFunctionInfo>();
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index a00ba39..1334718 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -68,14 +68,13 @@ public:
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -89,18 +88,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- int FrameIndex) const;
-
- virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
- MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
- MachineInstr* LoadMI) const {
- return 0;
- }
-
unsigned getGlobalBaseReg(MachineFunction *MF) const;
};
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index 9489580..ddadd51 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -665,7 +665,7 @@ let Defs = [FCC] in {
//===----------------------------------------------------------------------===//
// V9 Conditional Moves.
-let Predicates = [HasV9], isTwoAddress = 1 in {
+let Predicates = [HasV9], Constraints = "$T = $dst" in {
// Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual.
// FIXME: Add instruction encodings for the JIT some day.
def MOVICCrr
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 08373bb8..427cc7f 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -52,13 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
return Reserved;
}
-
-const TargetRegisterClass* const*
-SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
- return CalleeSavedRegClasses;
-}
-
bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const {
return false;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 24d43e3..9f0cda7 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -32,9 +32,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 90be222..d7ac8f5 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -124,7 +124,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
if (strncmp(Modifier + 7, "even", 4) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit);
else if (strncmp(Modifier + 7, "odd", 3) == 0)
Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
else
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index bb2952a..ed290ca 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -670,7 +670,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_even32 : SystemZ::subreg_even);
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -754,7 +754,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
unsigned SubRegIdx = (is32Bit ?
- SystemZ::subreg_even32 : SystemZ::subreg_even);
+ SystemZ::subreg_32bit : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 76f2901..67f739f 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -254,6 +254,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -266,7 +267,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
}
}
@@ -334,7 +335,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the nodes corresponding to a load from this parameter slot.
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
- VA.getLocMemOffset(), true, false);
+ VA.getLocMemOffset(), true);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter
@@ -372,6 +373,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg>
&Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -402,7 +404,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -464,7 +466,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy());
@@ -550,6 +552,7 @@ SDValue
SystemZTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of the return value to a location
@@ -575,7 +578,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain,
// Copy the result values into the output registers.
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
- SDValue ResValue = Outs[i].Val;
+ SDValue ResValue = OutVals[i];
assert(VA.isRegLoc() && "Can only return in registers!");
// If this is an 8/16/32-bit value, it is really should be passed promoted
@@ -729,14 +732,14 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue Result;
if (!IsPic && !ExtraLoadRequired) {
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
unsigned char OpFlags = 0;
if (ExtraLoadRequired)
OpFlags = SystemZII::MO_GOTENT;
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl,
@@ -827,16 +830,20 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm();
- BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
F->insert(I, copy0MBB);
F->insert(I, copy1MBB);
// Update machine-CFG edges by transferring all successors of the current
// block to the new block which will contain the Phi node for the select.
- copy1MBB->transferSuccessors(BB);
+ copy1MBB->splice(copy1MBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(copy1MBB);
+ BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to copy1MBB
@@ -849,11 +856,11 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = copy1MBB;
- BuildMI(BB, dl, TII.get(SystemZ::PHI),
+ BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI),
MI->getOperand(0).getReg())
.addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(1).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h
index 94bd906..51d2df3 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/lib/Target/SystemZ/SystemZISelLowering.h
@@ -98,6 +98,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -126,6 +127,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -134,6 +136,7 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
const SystemZSubtarget &Subtarget;
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 8c5e905..a658280 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -126,7 +126,7 @@ def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
(implicit PSW)]>;
}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Defs = [PSW] in {
let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y
def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2),
@@ -237,7 +237,7 @@ def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2),
"ddb\t{$dst, $src2}",
[(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>;
-} // isTwoAddress = 1
+} // Constraints = "$src1 = $dst"
def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src),
"sqebr\t{$dst, $src}",
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 043686c..c03864f 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -117,59 +117,28 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
}
-bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- // Determine if DstRC and SrcRC have a common superclass.
- const TargetRegisterClass *CommonRC = DestRC;
- if (DestRC == SrcRC)
- /* Same regclass for source and dest */;
- else if (CommonRC->hasSuperClass(SrcRC))
- CommonRC = SrcRC;
- else if (!CommonRC->hasSubClass(SrcRC))
- CommonRC = 0;
-
- if (CommonRC) {
- if (CommonRC == &SystemZ::GR64RegClass ||
- CommonRC == &SystemZ::ADDR64RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR32RegClass ||
- CommonRC == &SystemZ::ADDR32RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR64PRegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::GR128RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::FP32RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg);
- } else if (CommonRC == &SystemZ::FP64RegClass) {
- BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg);
- } else {
- return false;
- }
-
- return true;
- }
-
- if ((SrcRC == &SystemZ::GR64RegClass &&
- DestRC == &SystemZ::ADDR64RegClass) ||
- (DestRC == &SystemZ::GR64RegClass &&
- SrcRC == &SystemZ::ADDR64RegClass)) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg);
- return true;
- } else if ((SrcRC == &SystemZ::GR32RegClass &&
- DestRC == &SystemZ::ADDR32RegClass) ||
- (DestRC == &SystemZ::GR32RegClass &&
- SrcRC == &SystemZ::ADDR32RegClass)) {
- BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg);
- return true;
- }
-
- return false;
+void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ unsigned Opc;
+ if (SystemZ::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rr;
+ else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV32rr;
+ else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV64rrP;
+ else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::MOV128rr;
+ else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV32rr;
+ else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg))
+ Opc = SystemZ::FMOV64rr;
+ else
+ llvm_unreachable("Impossible reg-to-reg copy");
+
+ BuildMI(MBB, I, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
}
bool
@@ -286,8 +255,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &SystemZ::FP64RegClass) {
+ if (!SystemZ::FP64RegClass.contains(Reg)) {
unsigned Offset = RegSpillOffsets[Reg];
CalleeFrameSize += 8;
if (StartOffset > Offset) {
@@ -332,11 +300,10 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Save FPRs
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass == &SystemZ::FP64RegClass) {
+ if (SystemZ::FP64RegClass.contains(Reg)) {
MBB.addLiveIn(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass,
- &RI);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, &RI);
}
}
@@ -361,9 +328,9 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// Restore FP registers
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass == &SystemZ::FP64RegClass)
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
+ if (SystemZ::FP64RegClass.contains(Reg))
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &SystemZ::FP64RegClass, &RI);
}
// Restore GP registers
@@ -523,9 +490,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME: this should probably have a DebugLoc operand
- DebugLoc DL;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index a753f14..0559619 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -60,11 +60,10 @@ public:
///
virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
- bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -102,7 +101,8 @@ public:
bool AllowModify) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 22bde4e..8df07c0 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -478,7 +478,8 @@ def MOV64rmm : RSYI<0x04EB,
"lmg\t{$from, $to, $dst}",
[]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in {
+let isReMaterializable = 1, isAsCheapAsAMove = 1,
+ Constraints = "$src = $dst" in {
def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src),
"lhi\t${dst:subreg_even}, 0",
[]>;
@@ -537,7 +538,7 @@ def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src),
(implicit PSW)]>;
}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Defs = [PSW] in {
@@ -924,12 +925,12 @@ def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2
"dlg\t{$dst, $src2}",
[]>;
} // mayLoad
-} // isTwoAddress = 1
+} // Constraints = "$src1 = $dst"
//===----------------------------------------------------------------------===//
// Shifts
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SRL32rri : RSI<0x88,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"srl\t{$src, $amt}",
@@ -939,7 +940,7 @@ def SRL64rri : RSYI<0xEB0C,
"srlg\t{$dst, $src, $amt}",
[(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SHL32rri : RSI<0x89,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"sll\t{$src, $amt}",
@@ -950,7 +951,7 @@ def SHL64rri : RSYI<0xEB0D,
[(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>;
let Defs = [PSW] in {
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def SRA32rri : RSI<0x8A,
(outs GR32:$dst), (ins GR32:$src, riaddr32:$amt),
"sra\t{$src, $amt}",
@@ -1129,13 +1130,13 @@ def : Pat<(mulhs GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd32),
GR32:$src2),
- subreg_even32)>;
+ subreg_32bit)>;
def : Pat<(mulhu GR32:$src1, GR32:$src2),
(EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
GR32:$src1, subreg_odd32),
GR32:$src2),
- subreg_even32)>;
+ subreg_32bit)>;
def : Pat<(mulhu GR64:$src1, GR64:$src2),
(EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
GR64:$src1, subreg_odd),
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 638fd17..ae96b0b 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -47,22 +47,6 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::GR64RegClass, &SystemZ::GR64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass,
- &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
if (hasFP(MF))
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 42aa5dd..670025f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -32,9 +32,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
/// Code Generation virtual methods...
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool hasReservedCallFrame(MachineFunction &MF) const { return true; }
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index b561744..33be8dd 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -55,7 +55,6 @@ class FPRL<bits<4> num, string n, list<Register> subregs>
let Namespace = "SystemZ" in {
def subreg_32bit : SubRegIndex;
-def subreg_even32 : SubRegIndex;
def subreg_odd32 : SubRegIndex;
def subreg_even : SubRegIndex;
def subreg_odd : SubRegIndex;
@@ -99,7 +98,7 @@ def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
}
// Register pairs
-let SubRegIndices = [subreg_even32, subreg_odd32] in {
+let SubRegIndices = [subreg_32bit, subreg_odd32] in {
def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>;
def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>;
def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>;
@@ -111,8 +110,7 @@ def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
}
let SubRegIndices = [subreg_even, subreg_odd],
- CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit),
- (subreg_odd32 subreg_odd, subreg_32bit)] in {
+ CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in {
def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>;
def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>;
def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>;
@@ -355,7 +353,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
[R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
{
- let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)];
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -391,7 +389,7 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
[R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
{
- let SubRegClasses = [(GR32 subreg_even32, subreg_odd32),
+ let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
(GR64 subreg_even, subreg_odd)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index 094a57e..c099a7e 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -28,6 +28,10 @@ const TargetRegisterClass *
TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
if (isLookupPtrRegClass())
return TRI->getPointerRegClass(RegClass);
+ // Instructions like INSERT_SUBREG do not have fixed register classes.
+ if (RegClass < 0)
+ return 0;
+ // Otherwise just look it up normally.
return TRI->getRegClass(RegClass);
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index b9372d0..dd7b532 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -101,7 +101,7 @@ static bool IsNullTerminatedString(const Constant *C) {
ConstantInt *Null =
dyn_cast<ConstantInt>(CVA->getOperand(ATy->getNumElements()-1));
- if (Null == 0 || Null->getZExtValue() != 0)
+ if (Null == 0 || !Null->isZero())
return false; // Not null terminated.
// Verify that the null doesn't occur anywhere else in the string.
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index dcc5f61..49bfad5 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -39,20 +39,20 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
TargetRegisterInfo::~TargetRegisterInfo() {}
-/// getPhysicalRegisterRegClass - Returns the Register Class of a physical
-/// register of the given type. If type is EVT::Other, then just return any
-/// register class the register belongs to.
+/// getMinimalPhysRegClass - Returns the Register Class of a physical
+/// register of the given type, picking the most sub register class of
+/// the right type that contains this physreg.
const TargetRegisterClass *
-TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
+TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
assert(isPhysicalRegister(reg) && "reg must be a physical register");
- // Pick the most super register class of the right type that contains
+ // Pick the most sub register class of the right type that contains
// this physreg.
const TargetRegisterClass* BestRC = 0;
for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){
const TargetRegisterClass* RC = *I;
if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) &&
- (!BestRC || BestRC->hasSuperClass(RC)))
+ (!BestRC || BestRC->hasSubClass(RC)))
BestRC = RC;
}
diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
index a58f58e..26797ab 100644
--- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp
@@ -33,13 +33,11 @@ class X86AsmLexer : public TargetAsmLexer {
}
const AsmToken &lexDefinite() {
- if(tentativeIsValid) {
+ if (tentativeIsValid) {
tentativeIsValid = false;
return tentativeToken;
}
- else {
- return getLexer()->Lex();
- }
+ return getLexer()->Lex();
}
AsmToken LexTokenATT();
@@ -72,38 +70,65 @@ public:
static unsigned MatchRegisterName(StringRef Name);
AsmToken X86AsmLexer::LexTokenATT() {
- const AsmToken lexedToken = lexDefinite();
+ AsmToken lexedToken = lexDefinite();
switch (lexedToken.getKind()) {
default:
- return AsmToken(lexedToken);
+ return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Percent:
- {
+ return lexedToken;
+
+ case AsmToken::Percent: {
const AsmToken &nextToken = lexTentative();
- if (nextToken.getKind() == AsmToken::Identifier) {
- unsigned regID = MatchRegisterName(nextToken.getString());
+ if (nextToken.getKind() != AsmToken::Identifier)
+ return lexedToken;
+
- if (regID) {
- lexDefinite();
+ if (unsigned regID = MatchRegisterName(nextToken.getString())) {
+ lexDefinite();
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
+ StringRef regStr(lexedToken.getString().data(),
+ lexedToken.getString().size() +
+ nextToken.getString().size());
+
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(regID));
+ }
+
+ // Match register name failed. If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (nextToken.getString().size() == 3 &&
+ nextToken.getString().startswith("db")) {
+ int RegNo = -1;
+ switch (nextToken.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != -1) {
+ lexDefinite();
+
+ // FIXME: This is completely wrong when there is a space or other
+ // punctuation between the % and the register name.
StringRef regStr(lexedToken.getString().data(),
lexedToken.getString().size() +
nextToken.getString().size());
-
- return AsmToken(AsmToken::Register,
- regStr,
- static_cast<int64_t>(regID));
- }
- else {
- return AsmToken(lexedToken);
+ return AsmToken(AsmToken::Register, regStr,
+ static_cast<int64_t>(RegNo));
}
}
- else {
- return AsmToken(lexedToken);
- }
+
+
+ return lexedToken;
}
}
}
@@ -113,26 +138,22 @@ AsmToken X86AsmLexer::LexTokenIntel() {
switch(lexedToken.getKind()) {
default:
- return AsmToken(lexedToken);
+ return lexedToken;
case AsmToken::Error:
SetError(Lexer->getErrLoc(), Lexer->getErr());
- return AsmToken(lexedToken);
- case AsmToken::Identifier:
- {
+ return lexedToken;
+ case AsmToken::Identifier: {
std::string upperCase = lexedToken.getString().str();
std::string lowerCase = LowercaseString(upperCase);
StringRef lowerRef(lowerCase);
unsigned regID = MatchRegisterName(lowerRef);
- if (regID) {
+ if (regID)
return AsmToken(AsmToken::Register,
lexedToken.getString(),
static_cast<int64_t>(regID));
- }
- else {
- return AsmToken(lexedToken);
- }
+ return lexedToken;
}
}
}
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 40a6a7b..a856e9c 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -412,6 +412,28 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
return false;
}
+ // If this is "db[0-7]", match it as an alias
+ // for dr[0-7].
+ if (RegNo == 0 && Tok.getString().size() == 3 &&
+ Tok.getString().startswith("db")) {
+ switch (Tok.getString()[2]) {
+ case '0': RegNo = X86::DR0; break;
+ case '1': RegNo = X86::DR1; break;
+ case '2': RegNo = X86::DR2; break;
+ case '3': RegNo = X86::DR3; break;
+ case '4': RegNo = X86::DR4; break;
+ case '5': RegNo = X86::DR5; break;
+ case '6': RegNo = X86::DR6; break;
+ case '7': RegNo = X86::DR7; break;
+ }
+
+ if (RegNo != 0) {
+ EndLoc = Tok.getLoc();
+ Parser.Lex(); // Eat it.
+ return false;
+ }
+ }
+
if (RegNo == 0)
return Error(Tok.getLoc(), "invalid register name");
@@ -597,6 +619,16 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
}
+ // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and
+ // the form jrcxz is not allowed in 32-bit mode.
+ if (Is64Bit) {
+ if (Name == "jcxz")
+ return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode");
+ } else {
+ if (Name == "jrcxz")
+ return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode");
+ }
+
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
@@ -617,6 +649,23 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("setnz", "setne")
.Case("jz", "je")
.Case("jnz", "jne")
+ .Case("jc", "jb")
+ // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode
+ // jecxz requires an AdSize prefix but jecxz does not have a prefix in
+ // 32-bit mode.
+ .Case("jecxz", "jcxz")
+ .Case("jrcxz", "jcxz")
+ .Case("jna", "jbe")
+ .Case("jnae", "jb")
+ .Case("jnb", "jae")
+ .Case("jnbe", "ja")
+ .Case("jnc", "jae")
+ .Case("jng", "jle")
+ .Case("jnge", "jl")
+ .Case("jnl", "jge")
+ .Case("jnle", "jg")
+ .Case("jpe", "jp")
+ .Case("jpo", "jnp")
.Case("cmovcl", "cmovbl")
.Case("cmovcl", "cmovbl")
.Case("cmovnal", "cmovbel")
@@ -631,36 +680,64 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("cmovnlel", "cmovgl")
.Case("cmovnzl", "cmovnel")
.Case("cmovzl", "cmovel")
+ .Case("fwait", "wait")
+ .Case("movzx", "movzb")
.Default(Name);
// FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
const MCExpr *ExtraImmOp = 0;
- if (PatchedName.startswith("cmp") &&
+ if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
(PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ bool IsVCMP = PatchedName.startswith("vcmp");
+ unsigned SSECCIdx = IsVCMP ? 4 : 3;
unsigned SSEComparisonCode = StringSwitch<unsigned>(
- PatchedName.slice(3, PatchedName.size() - 2))
- .Case("eq", 0)
- .Case("lt", 1)
- .Case("le", 2)
- .Case("unord", 3)
- .Case("neq", 4)
- .Case("nlt", 5)
- .Case("nle", 6)
- .Case("ord", 7)
+ PatchedName.slice(SSECCIdx, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Case("eq_uq", 8)
+ .Case("nge", 9)
+ .Case("ngt", 0x0A)
+ .Case("false", 0x0B)
+ .Case("neq_oq", 0x0C)
+ .Case("ge", 0x0D)
+ .Case("gt", 0x0E)
+ .Case("true", 0x0F)
+ .Case("eq_os", 0x10)
+ .Case("lt_oq", 0x11)
+ .Case("le_oq", 0x12)
+ .Case("unord_s", 0x13)
+ .Case("neq_us", 0x14)
+ .Case("nlt_uq", 0x15)
+ .Case("nle_uq", 0x16)
+ .Case("ord_s", 0x17)
+ .Case("eq_us", 0x18)
+ .Case("nge_uq", 0x19)
+ .Case("ngt_uq", 0x1A)
+ .Case("false_os", 0x1B)
+ .Case("neq_os", 0x1C)
+ .Case("ge_oq", 0x1D)
+ .Case("gt_oq", 0x1E)
+ .Case("true_us", 0x1F)
.Default(~0U);
if (SSEComparisonCode != ~0U) {
ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
getParser().getContext());
if (PatchedName.endswith("ss")) {
- PatchedName = "cmpss";
+ PatchedName = IsVCMP ? "vcmpss" : "cmpss";
} else if (PatchedName.endswith("sd")) {
- PatchedName = "cmpsd";
+ PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
} else if (PatchedName.endswith("ps")) {
- PatchedName = "cmpps";
+ PatchedName = IsVCMP ? "vcmpps" : "cmpps";
} else {
assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
- PatchedName = "cmppd";
+ PatchedName = IsVCMP ? "vcmppd" : "cmppd";
}
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 0b64cb4..f2cdb5b 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -85,11 +85,18 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
const MCOperand &BaseReg = MI->getOperand(Op);
const MCOperand &IndexReg = MI->getOperand(Op+2);
const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
if (DispSpec.isImm()) {
int64_t DispVal = DispSpec.getImm();
@@ -115,13 +122,3 @@ void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
O << ')';
}
}
-
-void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // If this has a segment register, print it.
- if (MI->getOperand(Op+4).getReg()) {
- printOperand(MI, Op+4, O);
- O << ':';
- }
- printLeaMemReference(MI, Op, O);
-}
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
index 8d5d508..3be4bae 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h
@@ -34,7 +34,6 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
- void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS);
void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS);
@@ -69,14 +68,8 @@ public:
void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
printMemReference(MI, OpNo, O);
}
- void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- printLeaMemReference(MI, OpNo, O);
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ printMemReference(MI, OpNo, O);
}
};
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 183213d..73bc603 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -200,6 +200,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
case X86II::MO_GOT: O << "@GOT"; break;
case X86II::MO_GOTOFF: O << "@GOTOFF"; break;
case X86II::MO_PLT: O << "@PLT"; break;
+ case X86II::MO_TLVP: O << "@TLVP"; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ O << "@TLVP" << '-';
+ PrintPICBaseSymbol(O);
+ break;
}
}
@@ -383,6 +388,8 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) {
printSymbolOperand(MO, O);
+ if (Subtarget->isPICStyleRIPRel())
+ O << "(%rip)";
return false;
}
if (MO.isReg()) {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
index 7e0a9bb..a632047 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp
@@ -81,12 +81,19 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
}
}
-void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
+void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
+ raw_ostream &O) {
const MCOperand &BaseReg = MI->getOperand(Op);
unsigned ScaleVal = MI->getOperand(Op+1).getImm();
const MCOperand &IndexReg = MI->getOperand(Op+2);
const MCOperand &DispSpec = MI->getOperand(Op+3);
+ const MCOperand &SegReg = MI->getOperand(Op+4);
+
+ // If this has a segment register, print it.
+ if (SegReg.getReg()) {
+ printOperand(MI, Op+4, O);
+ O << ':';
+ }
O << '[';
@@ -104,7 +111,7 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
NeedPlus = true;
}
-
+
if (!DispSpec.isImm()) {
if (NeedPlus) O << " + ";
assert(DispSpec.isExpr() && "non-immediate displacement for LEA?");
@@ -126,13 +133,3 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op,
O << ']';
}
-
-void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op,
- raw_ostream &O) {
- // If this has a segment register, print it.
- if (MI->getOperand(Op+4).getReg()) {
- printOperand(MI, Op+4, O);
- O << ':';
- }
- printLeaMemReference(MI, Op, O);
-}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
index a0beeb2..4d68074 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h
@@ -36,7 +36,6 @@ public:
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
- void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
@@ -81,17 +80,9 @@ public:
O << "XMMWORD PTR ";
printMemReference(MI, OpNo, O);
}
- void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "DWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "QWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
- }
- void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
- O << "QWORD PTR ";
- printLeaMemReference(MI, OpNo, O);
+ void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo, O);
}
};
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 4edeca9..09f150b 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -152,6 +152,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_DARWIN_STUB:
break;
+ case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
+ case X86II::MO_TLVP_PIC_BASE:
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr
+ = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ Ctx),
+ Ctx);
+
+ break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
@@ -266,10 +277,21 @@ static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
return;
// Check whether this is an absolute address.
- if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
- Inst.getOperand(AddrBase + 2).getReg() != 0 ||
- Inst.getOperand(AddrBase + 4).getReg() != 0 ||
- Inst.getOperand(AddrBase + 1).getImm() != 1)
+ // FIXME: We know TLVP symbol refs aren't, but there should be a better way
+ // to do this here.
+ bool Absolute = true;
+ if (Inst.getOperand(AddrOp).isExpr()) {
+ const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr();
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(MCE))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP)
+ Absolute = false;
+ }
+
+ if (Absolute &&
+ (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1))
return;
// If so, rewrite the instruction.
@@ -327,6 +349,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
switch (OutMI.getOpcode()) {
case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand.
lower_lea64_32mem(&OutMI, 1);
+ // FALL THROUGH.
+ case X86::LEA64r:
+ case X86::LEA16r:
+ case X86::LEA32r:
+ // LEA should have a segment register, but it must be empty.
+ assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands &&
+ "Unexpected # of LEA operands");
+ assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 &&
+ "LEA has segment specified!");
break;
case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break;
case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break;
@@ -364,10 +395,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
- // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
// register inputs modeled as normal uses instead of implicit uses. As such,
// truncate off all but the first operand (the callee). FIXME: Change isel.
- case X86::TAILJMPr:
case X86::TAILJMPr64:
case X86::CALL64r:
case X86::CALL64pcrel32: {
@@ -380,11 +410,20 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
}
// TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPr:
case X86::TAILJMPd:
case X86::TAILJMPd64: {
+ unsigned Opcode;
+ switch (OutMI.getOpcode()) {
+ default: assert(0 && "Invalid opcode");
+ case X86::TAILJMPr: Opcode = X86::JMP32r; break;
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
+ }
+
MCOperand Saved = OutMI.getOperand(0);
OutMI = MCInst();
- OutMI.setOpcode(X86::TAILJMP_1);
+ OutMI.setOpcode(Opcode);
OutMI.addOperand(Saved);
break;
}
@@ -483,8 +522,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
- O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O);
+ O << '[';
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ printOperand(MI, 0, O);
+ else
+ O << "undef";
+ O << '+'; printOperand(MI, 3, O);
O << ']';
O << "+";
printOperand(MI, NOps-2, O);
@@ -495,8 +538,9 @@ X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
MachineLocation Location;
assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!");
// Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm());
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
+
+ if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm())
+ Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm());
return Location;
}
@@ -513,6 +557,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
return;
+ case X86::TAILJMPr:
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64:
+ // Lower these as normal, but add some comments.
+ OutStreamer.AddComment("TAILCALL");
+ break;
+
case X86::MOVPC32r: {
MCInst TmpInst;
// This is a pseudo op for a two instruction sequence with a label, which
@@ -578,7 +629,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
-
OutStreamer.EmitInstruction(TmpInst);
}
diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt
index 9f91060..97589c0 100644
--- a/lib/Target/X86/Disassembler/CMakeLists.txt
+++ b/lib/Target/X86/Disassembler/CMakeLists.txt
@@ -4,8 +4,8 @@ add_llvm_library(LLVMX86Disassembler
X86Disassembler.cpp
X86DisassemblerDecoder.c
)
-# workaround for hanging compilation on MSVC9
-if( MSVC_VERSION EQUAL 1500 )
+# workaround for hanging compilation on MSVC9 and 10
+if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 )
set_property(
SOURCE X86Disassembler.cpp
PROPERTY COMPILE_FLAGS "/Od"
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 8a5a630..09f1584 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -252,13 +252,8 @@ static bool translateRMRegister(MCInst &mcInst,
/// @param mcInst - The MCInst to append to.
/// @param insn - The instruction to extract Mod, R/M, and SIB fields
/// from.
-/// @param sr - Whether or not to emit the segment register. The
-/// LEA instruction does not expect a segment-register
-/// operand.
/// @return - 0 on success; nonzero otherwise
-static bool translateRMMemory(MCInst &mcInst,
- InternalInstruction &insn,
- bool sr) {
+static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) {
// Addresses in an MCInst are represented as five operands:
// 1. basereg (register) The R/M base, or (if there is a SIB) the
// SIB base
@@ -385,10 +380,7 @@ static bool translateRMMemory(MCInst &mcInst,
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
mcInst.addOperand(displacement);
-
- if (sr)
- mcInst.addOperand(segmentReg);
-
+ mcInst.addOperand(segmentReg);
return false;
}
@@ -439,9 +431,8 @@ static bool translateRM(MCInst &mcInst,
case TYPE_M1616:
case TYPE_M1632:
case TYPE_M1664:
- return translateRMMemory(mcInst, insn, true);
case TYPE_LEA:
- return translateRMMemory(mcInst, insn, false);
+ return translateRMMemory(mcInst, insn);
}
}
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index e5f84e8..b6aba93 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -36,62 +36,6 @@ The pattern isel got this one right.
//===---------------------------------------------------------------------===//
-SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction
-like this:
-
- X += y
-
-and the register allocator decides to spill X, it is cheaper to emit this as:
-
-Y += [xslot]
-store Y -> [xslot]
-
-than as:
-
-tmp = [xslot]
-tmp += y
-store tmp -> [xslot]
-
-..and this uses one fewer register (so this should be done at load folding
-time, not at spiller time). *Note* however that this can only be done
-if Y is dead. Here's a testcase:
-
-@.str_3 = external global [15 x i8]
-declare void @printf(i32, ...)
-define void @main() {
-build_tree.exit:
- br label %no_exit.i7
-
-no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
- %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ],
- [ %tmp.34.i18, %no_exit.i7 ]
- %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
- [ %tmp.28.i16, %no_exit.i7 ]
- %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00
- %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00
- br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
-
-Compute_Tree.exit23: ; preds = %no_exit.i7
- tail call void (i32, ...)* @printf( i32 0 )
- store double %tmp.34.i18, double* null
- ret void
-}
-
-We currently emit:
-
-.BBmain_1:
- xorpd %XMM1, %XMM1
- addsd %XMM0, %XMM1
-*** movsd %XMM2, QWORD PTR [%ESP + 8]
-*** addsd %XMM2, %XMM1
-*** movsd QWORD PTR [%ESP + 8], %XMM2
- jmp .BBmain_1 # no_exit.i7
-
-This is a bugpoint reduced testcase, which is why the testcase doesn't make
-much sense (e.g. its an infinite loop). :)
-
-//===---------------------------------------------------------------------===//
-
SSE should implement 'select_cc' using 'emulated conditional moves' that use
pcmp/pand/pandn/por to do a selection instead of a conditional branch:
@@ -122,12 +66,6 @@ LBB_X_2:
//===---------------------------------------------------------------------===//
-It's not clear whether we should use pxor or xorps / xorpd to clear XMM
-registers. The choice may depend on subtarget information. We should do some
-more experiments on different x86 machines.
-
-//===---------------------------------------------------------------------===//
-
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.
@@ -151,45 +89,6 @@ Perhaps use pxor / xorp* to clear a XMM register first?
//===---------------------------------------------------------------------===//
-How to decide when to use the "floating point version" of logical ops? Here are
-some code fragments:
-
- movaps LCPI5_5, %xmm2
- divps %xmm1, %xmm2
- mulps %xmm2, %xmm3
- mulps 8656(%ecx), %xmm3
- addps 8672(%ecx), %xmm3
- andps LCPI5_6, %xmm2
- andps LCPI5_1, %xmm3
- por %xmm2, %xmm3
- movdqa %xmm3, (%edi)
-
- movaps LCPI5_5, %xmm1
- divps %xmm0, %xmm1
- mulps %xmm1, %xmm3
- mulps 8656(%ecx), %xmm3
- addps 8672(%ecx), %xmm3
- andps LCPI5_6, %xmm1
- andps LCPI5_1, %xmm3
- orps %xmm1, %xmm3
- movaps %xmm3, 112(%esp)
- movaps %xmm3, (%ebx)
-
-Due to some minor source change, the later case ended up using orps and movaps
-instead of por and movdqa. Does it matter?
-
-//===---------------------------------------------------------------------===//
-
-X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
-to choose between movaps, movapd, and movdqa based on types of source and
-destination?
-
-How about andps, andpd, and pand? Do we really care about the type of the packed
-elements? If not, why not always use the "ps" variants which are likely to be
-shorter.
-
-//===---------------------------------------------------------------------===//
-
External test Nurbs exposed some problems. Look for
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
emits:
@@ -278,41 +177,6 @@ It also exposes some other problems. See MOV32ri -3 and the spills.
//===---------------------------------------------------------------------===//
-http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500
-
-LLVM is producing bad code.
-
-LBB_main_4: # cond_true44
- addps %xmm1, %xmm2
- subps %xmm3, %xmm2
- movaps (%ecx), %xmm4
- movaps %xmm2, %xmm1
- addps %xmm4, %xmm1
- addl $16, %ecx
- incl %edx
- cmpl $262144, %edx
- movaps %xmm3, %xmm2
- movaps %xmm4, %xmm3
- jne LBB_main_4 # cond_true44
-
-There are two problems. 1) No need to two loop induction variables. We can
-compare against 262144 * 16. 2) Known register coalescer issue. We should
-be able eliminate one of the movaps:
-
- addps %xmm2, %xmm1 <=== Commute!
- subps %xmm3, %xmm1
- movaps (%ecx), %xmm4
- movaps %xmm1, %xmm1 <=== Eliminate!
- addps %xmm4, %xmm1
- addl $16, %ecx
- incl %edx
- cmpl $262144, %edx
- movaps %xmm3, %xmm2
- movaps %xmm4, %xmm3
- jne LBB_main_4 # cond_true44
-
-//===---------------------------------------------------------------------===//
-
Consider:
__m128 test(float a) {
@@ -382,22 +246,6 @@ elements are fixed zeros.
//===---------------------------------------------------------------------===//
-__m128d test1( __m128d A, __m128d B) {
- return _mm_shuffle_pd(A, B, 0x3);
-}
-
-compiles to
-
-shufpd $3, %xmm1, %xmm0
-
-Perhaps it's better to use unpckhpd instead?
-
-unpckhpd %xmm1, %xmm0
-
-Don't know if unpckhpd is faster. But it is shorter.
-
-//===---------------------------------------------------------------------===//
-
This code generates ugly code, probably due to costs being off or something:
define void @test(float* %P, <4 x float>* %P2 ) {
@@ -549,6 +397,7 @@ entry:
%tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
ret i64 %tmp20
}
+declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly
This currently compiles to:
@@ -987,3 +836,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and
doing a shuffle from v[1] to v[0] then a float store.
//===---------------------------------------------------------------------===//
+
+On SSE4 machines, we compile this code:
+
+define <2 x float> @test2(<2 x float> %Q, <2 x float> %R,
+ <2 x float> *%P) nounwind {
+ %Z = fadd <2 x float> %Q, %R
+
+ store <2 x float> %Z, <2 x float> *%P
+ ret <2 x float> %Z
+}
+
+into:
+
+_test2: ## @test2
+## BB#0:
+ insertps $0, %xmm2, %xmm2
+ insertps $16, %xmm3, %xmm2
+ insertps $0, %xmm0, %xmm3
+ insertps $16, %xmm1, %xmm3
+ addps %xmm2, %xmm3
+ movq %xmm3, (%rdi)
+ movaps %xmm3, %xmm0
+ pshufd $1, %xmm3, %xmm1
+ ## kill: XMM1<def> XMM1<kill>
+ ret
+
+The insertps's of $0 are pointless complex copies.
+
+//===---------------------------------------------------------------------===//
+
+
diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt
index e8f7c5d..78c4dc0 100644
--- a/lib/Target/X86/README-X86-64.txt
+++ b/lib/Target/X86/README-X86-64.txt
@@ -1,27 +1,5 @@
//===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===//
-Implement different PIC models? Right now we only support Mac OS X with small
-PIC code model.
-
-//===---------------------------------------------------------------------===//
-
-For this:
-
-extern void xx(void);
-void bar(void) {
- xx();
-}
-
-gcc compiles to:
-
-.globl _bar
-_bar:
- jmp _xx
-
-We need to do the tailcall optimization as well.
-
-//===---------------------------------------------------------------------===//
-
AMD64 Optimization Manual 8.2 has some nice information about optimizing integer
multiplication by a constant. How much of it applies to Intel's X86-64
implementation? There are definite trade-offs to consider: latency vs. register
@@ -96,123 +74,14 @@ gcc:
movq %rax, (%rdx)
ret
-//===---------------------------------------------------------------------===//
-
-Vararg function prologue can be further optimized. Currently all XMM registers
-are stored into register save area. Most of them can be eliminated since the
-upper bound of the number of XMM registers used are passed in %al. gcc produces
-something like the following:
-
- movzbl %al, %edx
- leaq 0(,%rdx,4), %rax
- leaq 4+L2(%rip), %rdx
- leaq 239(%rsp), %rax
- jmp *%rdx
- movaps %xmm7, -15(%rax)
- movaps %xmm6, -31(%rax)
- movaps %xmm5, -47(%rax)
- movaps %xmm4, -63(%rax)
- movaps %xmm3, -79(%rax)
- movaps %xmm2, -95(%rax)
- movaps %xmm1, -111(%rax)
- movaps %xmm0, -127(%rax)
-L2:
-
-It jumps over the movaps that do not need to be stored. Hard to see this being
-significant as it added 5 instruciton (including a indirect branch) to avoid
-executing 0 to 8 stores in the function prologue.
-
-Perhaps we can optimize for the common case where no XMM registers are used for
-parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a
-leaf function where we can determine that no XMM input parameter is need, avoid
-emitting the stores at all.
-
-//===---------------------------------------------------------------------===//
+And the codegen is even worse for the following
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
+ void fill1(char *s, int a)
+ {
+ __builtin_memset(s, a, 15);
+ }
-AMD64 has a complex calling convention for aggregate passing by value:
-
-1. If the size of an object is larger than two eightbytes, or in C++, is a non-
- POD structure or union type, or contains unaligned fields, it has class
- MEMORY.
-2. Both eightbytes get initialized to class NO_CLASS.
-3. Each field of an object is classified recursively so that always two fields
- are considered. The resulting class is calculated according to the classes
- of the fields in the eightbyte:
- (a) If both classes are equal, this is the resulting class.
- (b) If one of the classes is NO_CLASS, the resulting class is the other
- class.
- (c) If one of the classes is MEMORY, the result is the MEMORY class.
- (d) If one of the classes is INTEGER, the result is the INTEGER.
- (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as
- class.
- (f) Otherwise class SSE is used.
-4. Then a post merger cleanup is done:
- (a) If one of the classes is MEMORY, the whole argument is passed in memory.
- (b) If SSEUP is not preceeded by SSE, it is converted to SSE.
-
-Currently llvm frontend does not handle this correctly.
-
-Problem 1:
- typedef struct { int i; double d; } QuadWordS;
-It is currently passed in two i64 integer registers. However, gcc compiled
-callee expects the second element 'd' to be passed in XMM0.
-
-Problem 2:
- typedef struct { int32_t i; float j; double d; } QuadWordS;
-The size of the first two fields == i64 so they will be combined and passed in
-a integer register RDI. The third field is still passed in XMM0.
-
-Problem 3:
- typedef struct { int64_t i; int8_t j; int64_t d; } S;
- void test(S s)
-The size of this aggregate is greater than two i64 so it should be passed in
-memory. Currently llvm breaks this down and passed it in three integer
-registers.
-
-Problem 4:
-Taking problem 3 one step ahead where a function expects a aggregate value
-in memory followed by more parameter(s) passed in register(s).
- void test(S s, int b)
-
-LLVM IR does not allow parameter passing by aggregates, therefore it must break
-the aggregates value (in problem 3 and 4) into a number of scalar values:
- void %test(long %s.i, byte %s.j, long %s.d);
-
-However, if the backend were to lower this code literally it would pass the 3
-values in integer registers. To force it be passed in memory, the frontend
-should change the function signiture to:
- void %test(long %undef1, long %undef2, long %undef3, long %undef4,
- long %undef5, long %undef6,
- long %s.i, byte %s.j, long %s.d);
-And the callee would look something like this:
- call void %test( undef, undef, undef, undef, undef, undef,
- %tmp.s.i, %tmp.s.j, %tmp.s.d );
-The first 6 undef parameters would exhaust the 6 integer registers used for
-parameter passing. The following three integer values would then be forced into
-memory.
-
-For problem 4, the parameter 'd' would be moved to the front of the parameter
-list so it will be passed in register:
- void %test(int %d,
- long %undef1, long %undef2, long %undef3, long %undef4,
- long %undef5, long %undef6,
- long %s.i, byte %s.j, long %s.d);
-
-//===---------------------------------------------------------------------===//
-
-Right now the asm printer assumes GlobalAddress are accessed via RIP relative
-addressing. Therefore, it is not possible to generate this:
- movabsq $__ZTV10polynomialIdE+16, %rax
-
-That is ok for now since we currently only support small model. So the above
-is selected as
- leaq __ZTV10polynomialIdE+16(%rip), %rax
-
-This is probably slightly slower but is much shorter than movabsq. However, if
-we were to support medium or larger code models, we need to use the movabs
-instruction. We should probably introduce something like AbsoluteAddress to
-distinguish it from GlobalAddress so the asm printer and JIT code emitter can
-do the right thing.
+For this version, we duplicate the computation of the constant to store.
//===---------------------------------------------------------------------===//
@@ -298,3 +167,107 @@ be able to recognize the zero extend. This could also presumably be implemented
if we have whole-function selectiondags.
//===---------------------------------------------------------------------===//
+
+Take the following C code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
+
+struct u1
+{
+ float x;
+ float y;
+};
+
+float foo(struct u1 u)
+{
+ return u.x + u.y;
+}
+
+Optimizes to the following IR:
+define float @foo(double %u.0) nounwind readnone {
+entry:
+ %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2]
+ %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1]
+ %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1]
+ %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1]
+ %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1]
+ %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1]
+ ret float %0
+}
+
+And current llvm-gcc/clang output:
+ movd %xmm0, %rax
+ movd %eax, %xmm1
+ shrq $32, %rax
+ movd %eax, %xmm0
+ addss %xmm1, %xmm0
+ ret
+
+We really shouldn't move the floats to RAX, only to immediately move them
+straight back to the XMM registers.
+
+There really isn't any good way to handle this purely in IR optimizers; it
+could possibly be handled by changing the output of the fronted, though. It
+would also be feasible to add a x86-specific DAGCombine to optimize the
+bitcast+trunc+(lshr+)bitcast combination.
+
+//===---------------------------------------------------------------------===//
+
+Take the following code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
+extern unsigned long table[];
+unsigned long foo(unsigned char *p) {
+ unsigned long tag = *p;
+ return table[tag >> 4] + table[tag & 0xf];
+}
+
+Current code generated:
+ movzbl (%rdi), %eax
+ movq %rax, %rcx
+ andq $240, %rcx
+ shrq %rcx
+ andq $15, %rax
+ movq table(,%rax,8), %rax
+ addq table(%rcx), %rax
+ ret
+
+Issues:
+1. First movq should be movl; saves a byte.
+2. Both andq's should be andl; saves another two bytes. I think this was
+ implemented at one point, but subsequently regressed.
+3. shrq should be shrl; saves another byte.
+4. The first andq can be completely eliminated by using a slightly more
+ expensive addressing mode.
+
+//===---------------------------------------------------------------------===//
+
+Consider the following (contrived testcase, but contains common factors):
+
+#include <stdarg.h>
+int test(int x, ...) {
+ int sum, i;
+ va_list l;
+ va_start(l, x);
+ for (i = 0; i < x; i++)
+ sum += va_arg(l, int);
+ va_end(l);
+ return sum;
+}
+
+Testcase given in C because fixing it will likely involve changing the IR
+generated for it. The primary issue with the result is that it doesn't do any
+of the optimizations which are possible if we know the address of a va_list
+in the current function is never taken:
+1. We shouldn't spill the XMM registers because we only call va_arg with "int".
+2. It would be nice if we could scalarrepl the va_list.
+3. Probably overkill, but it'd be cool if we could peel off the first five
+iterations of the loop.
+
+Other optimizations involving functions which use va_arg on floats which don't
+have the address of a va_list taken:
+1. Conversely to the above, we shouldn't spill general registers if we only
+ call va_arg on "double".
+2. If we know nothing more than 64 bits wide is read from the XMM registers,
+ we can change the spilling code to reduce the amount of stack used by half.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index d4545a6..efc0cd8 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1
//===---------------------------------------------------------------------===//
-This testcase misses a read/modify/write opportunity (from PR1425):
-
-void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
- int i;
- for(i=0; i<width; i++)
- b1[i] += (1*(b0[i] + b2[i])+0)>>0;
-}
-
-We compile it down to:
-
-LBB1_2: # bb
- movl (%esi,%edi,4), %ebx
- addl (%ecx,%edi,4), %ebx
- addl (%edx,%edi,4), %ebx
- movl %ebx, (%ecx,%edi,4)
- incl %edi
- cmpl %eax, %edi
- jne LBB1_2 # bb
-
-the inner loop should add to the memory location (%ecx,%edi,4), saving
-a mov. Something like:
-
- movl (%esi,%edi,4), %ebx
- addl (%edx,%edi,4), %ebx
- addl %ebx, (%ecx,%edi,4)
-
-Here is another interesting example:
-
-void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
- int i;
- for(i=0; i<width; i++)
- b1[i] -= (1*(b0[i] + b2[i])+0)>>0;
-}
-
-We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]:
-
-LBB9_2: # bb
- movl (%ecx,%edi,4), %ebx
- subl (%esi,%edi,4), %ebx
- subl (%edx,%edi,4), %ebx
- movl %ebx, (%ecx,%edi,4)
- incl %edi
- cmpl %eax, %edi
- jne LBB9_2 # bb
-
-Additionally, LSR should rewrite the exit condition of these loops to use
-a stride-4 IV, would would allow all the scales in the loop to go away.
-This would result in smaller code and more efficient microops.
-
-//===---------------------------------------------------------------------===//
-
In SSE mode, we turn abs and neg into a load from the constant pool plus a xor
or and instruction, for example:
@@ -1301,15 +1250,8 @@ FirstOnet:
xorl %eax, %eax
ret
-There are a few possible improvements here:
-1. We should be able to eliminate the dead load into %ecx
-2. We could change the "movl 8(%esp), %eax" into
- "movzwl 10(%esp), %eax"; this lets us change the cmpl
- into a testl, which is shorter, and eliminate the shift.
-
-We could also in theory eliminate the branch by using a conditional
-for the address of the load, but that seems unlikely to be worthwhile
-in general.
+We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this
+lets us change the cmpl into a testl, which is shorter, and eliminate the shift.
//===---------------------------------------------------------------------===//
@@ -1331,22 +1273,23 @@ bb7: ; preds = %entry
to:
-_foo:
+foo: # @foo
+# BB#0: # %entry
+ movl 4(%esp), %ecx
cmpb $0, 16(%esp)
- movl 12(%esp), %ecx
+ je .LBB0_2
+# BB#1: # %bb
movl 8(%esp), %eax
- movl 4(%esp), %edx
- je LBB1_2 # bb7
-LBB1_1: # bb
- addl %edx, %eax
+ addl %ecx, %eax
ret
-LBB1_2: # bb7
- movl %edx, %eax
- subl %ecx, %eax
+.LBB0_2: # %bb7
+ movl 12(%esp), %edx
+ movl %ecx, %eax
+ subl %edx, %eax
ret
-The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2
-if it commuted the addl in LBB1_1.
+There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a
+couple more movls by putting 4(%esp) into %eax instead of %ecx.
//===---------------------------------------------------------------------===//
@@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function.
//===---------------------------------------------------------------------===//
-Legalize loses track of the fact that bools are always zero extended when in
-memory. This causes us to compile abort_gzip (from 164.gzip) from:
+Take the following:
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
@@ -1416,16 +1358,15 @@ bb4.i: ; preds = %entry
}
declare void @exit(i32) noreturn nounwind
-into:
-
-_abort_gzip:
+This compiles into:
+_abort_gzip: ## @abort_gzip
+## BB#0: ## %entry
subl $12, %esp
movb _in_exit.4870.b, %al
- notb %al
- testb $1, %al
- jne LBB1_2 ## bb4.i
-LBB1_1: ## bb.i
- ...
+ cmpb $1, %al
+ jne LBB0_2
+
+We somehow miss folding the movb into the cmpb.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 22e89a5..677781d 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -35,6 +35,10 @@ class formatted_raw_ostream;
FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
+/// createGlobalBaseRegPass - This pass initializes a global base
+/// register for PIC on x86-32.
+FunctionPass* createGlobalBaseRegPass();
+
/// createX86FloatingPointStackifierPass - This function returns a pass which
/// converts floating point register references and pseudo instructions into
/// floating point stack references and physical instructions.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 151087f..2cf65c1 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -23,13 +23,13 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
-namespace {
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
case X86::reloc_pcrel_1byte:
case FK_Data_1: return 0;
+ case X86::reloc_pcrel_2byte:
case FK_Data_2: return 1;
case X86::reloc_pcrel_4byte:
case X86::reloc_riprel_4byte:
@@ -39,6 +39,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) {
}
}
+namespace {
class X86AsmBackend : public TargetAsmBackend {
public:
X86AsmBackend(const Target &T)
@@ -60,6 +61,7 @@ public:
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
+} // end anonymous namespace
static unsigned getRelaxedOpcode(unsigned Op) {
switch (Op) {
@@ -75,7 +77,6 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case X86::JG_1: return X86::JG_4;
case X86::JLE_1: return X86::JLE_4;
case X86::JL_1: return X86::JL_4;
- case X86::TAILJMP_1:
case X86::JMP_1: return X86::JMP_4;
case X86::JNE_1: return X86::JNE_4;
case X86::JNO_1: return X86::JNO_4;
@@ -180,6 +181,7 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
/* *** */
+namespace {
class ELFX86AsmBackend : public X86AsmBackend {
public:
ELFX86AsmBackend(const Target &T)
@@ -281,7 +283,7 @@ public:
}
};
-}
+} // end anonymous namespace
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index a5774e1..a6a1e4e 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -42,7 +42,7 @@ def RetCC_X86Common : CallingConv<[
// MMX vector types are always returned in MM0. If the target doesn't have
// MM0, it doesn't support these vector types.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>,
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>,
// Long double types are always returned in ST0 (even with SSE).
CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[
// returned in RAX. This disagrees with ABI documentation but is bug
// compatible with gcc.
CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
- CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>,
+ CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>,
CCDelegateTo<RetCC_X86Common>
]>;
@@ -155,7 +155,7 @@ def CC_X86_64_C : CallingConv<[
// The first 8 MMX (except for v1i64) vector arguments are passed in XMM
// registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfType<[v8i8, v4i16, v2i32],
CCIfSubtarget<"isTargetDarwin()",
CCIfSubtarget<"hasSSE2()",
CCPromoteToType<v2i64>>>>,
@@ -177,7 +177,7 @@ def CC_X86_64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>>
+ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
// Calling convention used on Win64
@@ -195,7 +195,7 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
// The first 4 MMX vector arguments are passed in GPRs.
- CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+ CCIfType<[v8i8, v4i16, v2i32, v1i64],
CCBitConvertToType<i64>>,
// The first 4 integer arguments are passed in integer registers.
@@ -254,7 +254,7 @@ def CC_X86_32_Common : CallingConv<[
// The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
// registers if the call is not a vararg call.
- CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32, v2f32],
+ CCIfNotVarArg<CCIfType<[v8i8, v4i16, v2i32],
CCAssignToReg<[MM0, MM1, MM2]>>>,
// Integer/Float values get stored in stack slots that are 4 bytes in
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 8f02604..f13669b 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -138,7 +138,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
// MOVPC32r is basically a call plus a pop instruction.
if (Desc.getOpcode() == X86::MOVPC32r)
emitInstruction(*I, &II->get(X86::POP32r));
- NumEmitted++; // Keep track of the # of mi's emitted
+ ++NumEmitted; // Keep track of the # of mi's emitted
}
}
} while (MCE.finishFunction(MF));
@@ -730,9 +730,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
case X86II::MRMDestMem: {
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp,
- getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)
+ getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)
.getReg()));
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps)
emitConstant(MI.getOperand(CurOp++).getImm(),
X86II::getSizeOfImm(Desc->TSFlags));
@@ -750,13 +750,7 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
break;
case X86II::MRMSrcMem: {
- // FIXME: Maybe lea should have its own form?
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
+ int AddrOperands = X86::AddrNumOperands;
intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ?
X86II::getSizeOfImm(Desc->TSFlags) : 0;
@@ -810,14 +804,14 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
case X86II::MRM2m: case X86II::MRM3m:
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m: {
- intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ?
- (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ?
+ intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ?
+ (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ?
X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0;
MCE.emitByte(BaseOpcode);
emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m,
PCAdj);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
if (CurOp == NumOps)
break;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 1bc5eb7..cdde24a 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -23,7 +23,9 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -52,20 +54,7 @@ class X86FastISel : public FastISel {
bool X86ScalarSSEf32;
public:
- explicit X86FastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- )
- : FastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- ) {
+ explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
X86ScalarSSEf64 = Subtarget->hasSSE2();
@@ -96,6 +85,8 @@ private:
bool X86SelectStore(const Instruction *I);
+ bool X86SelectRet(const Instruction *I);
+
bool X86SelectCmp(const Instruction *I);
bool X86SelectZExt(const Instruction *I);
@@ -117,6 +108,7 @@ private:
bool X86SelectCall(const Instruction *I);
CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
+ CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false);
const X86InstrInfo *getInstrInfo() const {
return getTargetMachine()->getInstrInfo();
@@ -190,6 +182,20 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
return CC_X86_32_C;
}
+/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling
+/// convention.
+CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC,
+ bool isTaillCall) {
+ if (Subtarget->is64Bit()) {
+ if (Subtarget->isTargetWin64())
+ return RetCC_X86_Win64_C;
+ else
+ return RetCC_X86_64_C;
+ }
+
+ return RetCC_X86_32_C;
+}
+
/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
/// Return true and the result register by reference if it is possible.
@@ -242,7 +248,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
}
ResultReg = createResultReg(RC);
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc), ResultReg), AM);
return true;
}
@@ -261,7 +268,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
case MVT::i1: {
// Mask out all but lowest bit.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
Val = AndResult;
}
@@ -278,7 +285,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
break;
}
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM).addReg(Val);
return true;
}
@@ -306,7 +314,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
}
if (Opc) {
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
+ DL, TII.get(Opc)), AM)
.addImm(Signed ? (uint64_t) CI->getSExtValue() :
CI->getZExtValue());
return true;
@@ -342,6 +351,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
const User *U = NULL;
unsigned Opcode = Instruction::UserOp1;
if (const Instruction *I = dyn_cast<Instruction>(V)) {
+ // Don't walk into other basic blocks; it's possible we haven't
+ // visited them yet, so the instructions may not yet be assigned
+ // virtual registers.
+ if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB)
+ return false;
+
Opcode = I->getOpcode();
U = I;
} else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
@@ -349,6 +364,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
U = C;
}
+ if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
+ if (Ty->getAddressSpace() > 255)
+ // Fast instruction selection doesn't support the special
+ // address spaces.
+ return false;
+
switch (Opcode) {
default: break;
case Instruction::BitCast:
@@ -370,8 +391,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
case Instruction::Alloca: {
// Do static allocas.
const AllocaInst *A = cast<AllocaInst>(V);
- DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
- if (SI != StaticAllocaMap.end()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(A);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
AM.BaseType = X86AddressMode::FrameIndexBase;
AM.Base.FrameIndex = SI->second;
return true;
@@ -411,20 +433,33 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
Disp += SL->getElementOffset(Idx);
} else {
uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- } else if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- } else
- // Unsupported.
- goto unsupported_gep;
+ SmallVector<const Value *, 4> Worklist;
+ Worklist.push_back(Op);
+ do {
+ Op = Worklist.pop_back_val();
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ } else if (isa<AddOperator>(Op) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Add the other operand back to the work list.
+ Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
+ } else if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ } else
+ // Unsupported.
+ goto unsupported_gep;
+ } while (!Worklist.empty());
}
}
// Check for displacement overflow.
@@ -473,7 +508,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// If this reference is relative to the pic base, set it now.
if (isGlobalRelativeToPICBase(GVFlags)) {
// FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
}
// Unless the ABI requires an extra load, return a direct reference to
@@ -504,6 +539,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
StubAM.GV = GV;
StubAM.GVOpFlags = GVFlags;
+ // Prepare for inserting code in the local-value area.
+ MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
@@ -516,8 +554,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
}
LoadReg = createResultReg(RC);
- addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
-
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
+
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
+
// Prevent loading GV stub multiple times in same MBB.
LocalValueMap[V] = LoadReg;
}
@@ -642,6 +685,93 @@ bool X86FastISel::X86SelectStore(const Instruction *I) {
return X86FastEmitStore(VT, I->getOperand(0), AM);
}
+/// X86SelectRet - Select and emit code to implement ret instructions.
+bool X86FastISel::X86SelectRet(const Instruction *I) {
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ CallingConv::ID CC = F.getCallingConv();
+ if (CC != CallingConv::C &&
+ CC != CallingConv::Fast &&
+ CC != CallingConv::X86_FastCall)
+ return false;
+
+ if (Subtarget->isTargetWin64())
+ return false;
+
+ // Don't handle popping bytes on return for now.
+ if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
+ ->getBytesToPopOnReturn() != 0)
+ return 0;
+
+ // fastcc with -tailcallopt is intended to provide a guaranteed
+ // tail call optimization. Fastisel doesn't know how to do that.
+ if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
+ return false;
+
+ // Let SDISel handle vararg functions.
+ if (F.isVarArg())
+ return false;
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext());
+ CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC));
+
+ const Value *RV = Ret->getOperand(0);
+ unsigned Reg = getRegForValue(RV);
+ if (Reg == 0)
+ return false;
+
+ // Only handle a single return value for now.
+ if (ValLocs.size() != 1)
+ return false;
+
+ CCValAssign &VA = ValLocs[0];
+
+ // Don't bother handling odd stuff for now.
+ if (VA.getLocInfo() != CCValAssign::Full)
+ return false;
+ // Only handle register returns for now.
+ if (!VA.isRegLoc())
+ return false;
+ // TODO: For now, don't try to handle cases where getLocInfo()
+ // says Full but the types don't match.
+ if (VA.getValVT() != TLI.getValueType(RV->getType()))
+ return false;
+
+ // The calling-convention tables for x87 returns don't tell
+ // the whole story.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
+ return false;
+
+ // Make the copy.
+ unsigned SrcReg = Reg + VA.getValNo();
+ unsigned DstReg = VA.getLocReg();
+ const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
+ // Avoid a cross-class copy. This is very unlikely.
+ if (!SrcRC->contains(DstReg))
+ return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ DstReg).addReg(SrcReg);
+
+ // Mark the register as live out of the function.
+ MRI.addLiveOut(VA.getLocReg());
+ }
+
+ // Now emit the RET.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
+ return true;
+}
+
/// X86SelectLoad - Select and emit code to implement load instructions.
///
bool X86FastISel::X86SelectLoad(const Instruction *I) {
@@ -661,15 +791,15 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) {
return false;
}
-static unsigned X86ChooseCmpOpcode(EVT VT) {
+static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch (VT.getSimpleVT().SimpleTy) {
default: return 0;
case MVT::i8: return X86::CMP8rr;
case MVT::i16: return X86::CMP16rr;
case MVT::i32: return X86::CMP32rr;
case MVT::i64: return X86::CMP64rr;
- case MVT::f32: return X86::UCOMISSrr;
- case MVT::f64: return X86::UCOMISDrr;
+ case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
+ case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
}
}
@@ -706,18 +836,21 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
// CMPri, otherwise use CMPrr.
if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
- BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
- .addImm(Op1C->getSExtValue());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
+ .addReg(Op0Reg)
+ .addImm(Op1C->getSExtValue());
return true;
}
}
- unsigned CompareOpc = X86ChooseCmpOpcode(VT);
+ unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
if (CompareOpc == 0) return false;
unsigned Op1Reg = getRegForValue(Op1);
if (Op1Reg == 0) return false;
- BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
+ .addReg(Op0Reg)
+ .addReg(Op1Reg);
return true;
}
@@ -739,9 +872,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned EReg = createResultReg(&X86::GR8RegClass);
unsigned NPReg = createResultReg(&X86::GR8RegClass);
- BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
- BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNPr), NPReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -752,9 +886,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
- BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
- BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::OR8rr), ResultReg)
+ .addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -793,7 +931,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
- BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -819,8 +957,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Unconditional branches are selected by tablegen-generated code.
// Handle a conditional branch.
const BranchInst *BI = cast<BranchInst>(I);
- MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
- MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
+ MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
// Fold the common case of a conditional branch with a comparison.
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
@@ -829,7 +967,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
// Try to take advantage of fallthrough opportunities.
CmpInst::Predicate Predicate = CI->getPredicate();
- if (MBB->isLayoutSuccessor(TrueMBB)) {
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
std::swap(TrueMBB, FalseMBB);
Predicate = CmpInst::getInversePredicate(Predicate);
}
@@ -878,16 +1016,18 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (!X86FastEmitCompare(Op0, Op1, VT))
return false;
- BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
+ .addMBB(TrueMBB);
if (Predicate == CmpInst::FCMP_UNE) {
// X86 requires a second branch to handle UNE (and OEQ,
// which is mapped to UNE above).
- BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
+ .addMBB(TrueMBB);
}
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
} else if (ExtractValueInst *EI =
@@ -910,10 +1050,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
const MachineInstr *SetMI = 0;
- unsigned Reg = lookUpRegForValue(EI);
+ unsigned Reg = getRegForValue(EI);
for (MachineBasicBlock::const_reverse_iterator
- RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
+ RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend();
+ RI != RE; ++RI) {
const MachineInstr &MI = *RI;
if (MI.definesRegister(Reg)) {
@@ -938,11 +1079,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpCode = SetMI->getOpcode();
if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
- BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
- X86::JO_4 : X86::JB_4))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4))
.addMBB(TrueMBB);
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
}
@@ -954,10 +1095,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
- BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
- BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
- FastEmitBranch(FalseMBB);
- MBB->addSuccessor(TrueMBB);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(OpReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
return true;
}
@@ -1014,7 +1157,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
// Fold immediate in shl(x,3).
if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(OpImm),
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
UpdateValueMap(I, ResultReg);
return true;
@@ -1022,17 +1165,19 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CReg).addReg(Op1Reg);
// The shift instruction uses X86::CL. If we defined a super-register
- // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
- // we're doing here.
+ // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
if (CReg != X86::CL)
- BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::sub_8bit);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::KILL), X86::CL)
+ .addReg(CReg, RegState::Kill);
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
+ .addReg(Op0Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1064,9 +1209,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) {
unsigned Op2Reg = getRegForValue(I->getOperand(2));
if (Op2Reg == 0) return false;
- BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
+ .addReg(Op0Reg).addReg(Op0Reg);
unsigned ResultReg = createResultReg(RC);
- BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Op1Reg).addReg(Op2Reg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1080,7 +1227,9 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
- BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSS2SDrr), ResultReg)
+ .addReg(OpReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1097,7 +1246,9 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
unsigned OpReg = getRegForValue(V);
if (OpReg == 0) return false;
unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
- BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(X86::CVTSD2SSrr), ResultReg)
+ .addReg(OpReg);
UpdateValueMap(I, ResultReg);
return true;
}
@@ -1132,7 +1283,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
+ .addReg(InputReg);
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
@@ -1153,14 +1305,18 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
switch (CI->getIntrinsicID()) {
default: break;
case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
+ case Intrinsic::uadd_with_overflow: {
// Cheat a little. We know that the registers for "add" and "seto" are
// allocated sequentially. However, we only keep track of the register
// for "add" in the value map. Use extractvalue's index to get the
// correct register for "seto".
- UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
+ unsigned OpReg = getRegForValue(Agg);
+ if (OpReg == 0)
+ return false;
+ UpdateValueMap(I, OpReg + *EI->idx_begin());
return true;
}
+ }
}
return false;
@@ -1174,8 +1330,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
- const Value *Op1 = I.getOperand(1); // The guard's value.
- const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
+ const Value *Op1 = I.getArgOperand(0); // The guard's value.
+ const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
// Grab the frame index.
X86AddressMode AM;
@@ -1186,7 +1342,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
+ ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
assert(CI && "Non-constant type in Intrinsic::objectsize?");
@@ -1204,8 +1360,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(OpC), ResultReg).
- addImm(CI->getZExtValue() == 0 ? -1ULL : 0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg).
+ addImm(CI->isZero() ? -1ULL : 0);
UpdateValueMap(&I, ResultReg);
return true;
}
@@ -1218,12 +1374,12 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
// FIXME may need to add RegState::Debug to any registers produced,
// although ESP/EBP should be the only ones at the moment.
- addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
- addMetadata(DI->getVariable());
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
+ addImm(0).addMetadata(DI->getVariable());
return true;
}
case Intrinsic::trap: {
- BuildMI(MBB, DL, TII.get(X86::TRAP));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
return true;
}
case Intrinsic::sadd_with_overflow:
@@ -1241,8 +1397,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
if (!isTypeLegal(RetTy, VT))
return false;
- const Value *Op1 = I.getOperand(1);
- const Value *Op2 = I.getOperand(2);
+ const Value *Op1 = I.getArgOperand(0);
+ const Value *Op2 = I.getArgOperand(1);
unsigned Reg1 = getRegForValue(Op1);
unsigned Reg2 = getRegForValue(Op2);
@@ -1259,7 +1415,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
return false;
unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
+ .addReg(Reg1).addReg(Reg2);
unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
// If the add with overflow is an intra-block value then we just want to
@@ -1277,7 +1434,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
unsigned Opc = X86::SETBr;
if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
Opc = X86::SETOr;
- BuildMI(MBB, DL, TII.get(Opc), ResultReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
return true;
}
}
@@ -1285,7 +1442,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
bool X86FastISel::X86SelectCall(const Instruction *I) {
const CallInst *CI = cast<CallInst>(I);
- const Value *Callee = I->getOperand(0);
+ const Value *Callee = CI->getCalledValue();
// Can't handle inline asm yet.
if (isa<InlineAsm>(Callee))
@@ -1314,6 +1471,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (FTy->isVarArg())
return false;
+ // Fast-isel doesn't know about callee-pop yet.
+ if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+ return false;
+
// Handle *simple* calls for now.
const Type *RetTy = CS.getType();
EVT RetVT;
@@ -1387,6 +1548,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+
+ // Allocate shadow area for Win64
+ if (Subtarget->isTargetWin64()) {
+ CCInfo.AllocateStack(32, 8);
+ }
+
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
// Get a count of how many bytes are to be pushed on the stack.
@@ -1394,7 +1561,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Issue CALLSEQ_START
unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
- BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
+ .addImm(NumBytes);
// Process argument: walk the register/memloc assignments, inserting
// copies / loads.
@@ -1449,11 +1617,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
if (VA.isRegLoc()) {
- TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
- Arg, RC, RC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ VA.getLocReg()).addReg(Arg);
RegArgs.push_back(VA.getLocReg());
} else {
unsigned LocMemOffset = VA.getLocMemOffset();
@@ -1475,12 +1640,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// ELF / PIC requires GOT in the EBX register before function calls via PLT
// GOT pointer.
if (Subtarget->isPICStyleGOT()) {
- TargetRegisterClass *RC = X86::GR32RegisterClass;
- unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
- DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ X86::EBX).addReg(Base);
}
// Issue the call.
@@ -1488,7 +1650,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CalleeOp) {
// Register-indirect call.
unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
- MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addReg(CalleeOp);
} else {
// Direct call.
@@ -1517,7 +1680,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
- MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
+ MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
+ .addGlobalAddress(GV, 0, OpFlags);
}
// Add an implicit use GOT pointer in EBX.
@@ -1530,9 +1694,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
- BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
+ .addImm(NumBytes).addImm(0);
// Now handle call return value (if any).
+ SmallVector<unsigned, 4> UsedRegs;
if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
@@ -1542,7 +1708,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
EVT CopyVT = RVLocs[0].getValVT();
TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
- TargetRegisterClass *SrcRC = DstRC;
// If this is a call to a function that returns an fp value on the x87 fp
// stack, but where we prefer to use the value in xmm registers, copy it
@@ -1551,15 +1716,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
RVLocs[0].getLocReg() == X86::ST1) &&
isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
CopyVT = MVT::f80;
- SrcRC = X86::RSTRegisterClass;
DstRC = X86::RFP80RegisterClass;
}
unsigned ResultReg = createResultReg(DstRC);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
- assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
- Emitted = true;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(RVLocs[0].getLocReg());
+ UsedRegs.push_back(RVLocs[0].getLocReg());
+
if (CopyVT != RVLocs[0].getValVT()) {
// Round the F80 the right size, which also moves to the appropriate xmm
// register. This is accomplished by storing the F80 value in memory and
@@ -1568,18 +1732,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
int FI = MFI.CreateStackObject(MemSize, MemSize, false);
- addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc)), FI)
+ .addReg(ResultReg);
DstRC = ResVT == MVT::f32
? X86::FR32RegisterClass : X86::FR64RegisterClass;
Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
ResultReg = createResultReg(DstRC);
- addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
+ addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), FI);
}
if (AndToI1) {
// Mask out all but lowest bit for some call which produces an i1.
unsigned AndResult = createResultReg(X86::GR8RegisterClass);
- BuildMI(MBB, DL,
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
ResultReg = AndResult;
}
@@ -1587,6 +1754,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
UpdateValueMap(I, ResultReg);
}
+ // Set all unused physreg defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
return true;
}
@@ -1599,6 +1769,8 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) {
return X86SelectLoad(I);
case Instruction::Store:
return X86SelectStore(I);
+ case Instruction::Ret:
+ return X86SelectRet(I);
case Instruction::ICmp:
case Instruction::FCmp:
return X86SelectCmp(I);
@@ -1699,7 +1871,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
else
Opc = X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
return 0;
@@ -1717,10 +1890,10 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
unsigned char OpFlag = 0;
if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
OpFlag = X86II::MO_PIC_BASE_OFFSET;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
} else if (Subtarget->isPICStyleGOT()) {
OpFlag = X86II::MO_GOTOFF;
- PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+ PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
} else if (Subtarget->isPICStyleRIPRel() &&
TM.getCodeModel() == CodeModel::Small) {
PICBase = X86::RIP;
@@ -1729,7 +1902,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
// Create the load from the constant pool.
unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
unsigned ResultReg = createResultReg(RC);
- addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
+ addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg),
MCPOffset, PICBase, OpFlag);
return ResultReg;
@@ -1743,7 +1917,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
// various places, but TargetMaterializeAlloca also needs a check
// in order to avoid recursion between getRegForValue,
// X86SelectAddrss, and TargetMaterializeAlloca.
- if (!StaticAllocaMap.count(C))
+ if (!FuncInfo.StaticAllocaMap.count(C))
return 0;
X86AddressMode AM;
@@ -1752,24 +1926,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
unsigned ResultReg = createResultReg(RC);
- addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
+ addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(Opc), ResultReg), AM);
return ResultReg;
}
namespace llvm {
- llvm::FastISel *X86::createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- ) {
- return new X86FastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- );
+ llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
+ return new X86FastISel(funcInfo);
}
}
diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h
index a8117d4..96e0aae 100644
--- a/lib/Target/X86/X86FixupKinds.h
+++ b/lib/Target/X86/X86FixupKinds.h
@@ -17,6 +17,7 @@ namespace X86 {
enum Fixups {
reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch.
reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1
+ reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw
reloc_riprel_4byte, // 32-bit rip-relative
reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq
};
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 93460ef..cee4ad7 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -133,7 +133,7 @@ namespace {
// Emit an fxch to update the runtime processors version of the state.
BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg);
- NumFXCH++;
+ ++NumFXCH;
}
void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) {
@@ -164,6 +164,8 @@ namespace {
void handleCompareFP(MachineBasicBlock::iterator &I);
void handleCondMovFP(MachineBasicBlock::iterator &I);
void handleSpecialFP(MachineBasicBlock::iterator &I);
+
+ bool translateCopy(MachineInstr*);
};
char FPS::ID = 0;
}
@@ -232,12 +234,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
MachineInstr *MI = I;
- unsigned Flags = MI->getDesc().TSFlags;
+ uint64_t Flags = MI->getDesc().TSFlags;
unsigned FPInstClass = Flags & X86II::FPTypeMask;
if (MI->isInlineAsm())
FPInstClass = X86II::SpecialFP;
-
+
+ if (MI->isCopy() && translateCopy(MI))
+ FPInstClass = X86II::SpecialFP;
+
if (FPInstClass == X86II::NotFP)
continue; // Efficiently ignore non-fp insts!
@@ -628,7 +633,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) {
void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) {
MachineInstr *MI = I;
unsigned NumOps = MI->getDesc().getNumOperands();
- assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) &&
+ assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) &&
"Can only handle fst* & ftst instructions!");
// Is this the last use of the source register?
@@ -1001,15 +1006,17 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::FpSET_ST0_32:
case X86::FpSET_ST0_64:
case X86::FpSET_ST0_80: {
+ // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
+ // arguments that use an st constraint. We expect a sequence of
+ // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
unsigned Op0 = getFPReg(MI->getOperand(0));
- // FpSET_ST0_80 is generated by copyRegToReg for both function return
- // and inline assembly with the "st" constrain. In the latter case,
- // it is possible for ST(0) to be alive after this instruction.
if (!MI->killsRegister(X86::FP0 + Op0)) {
- // Duplicate Op0
- duplicateToTop(0, 7 /*temp register*/, I);
+ // Duplicate Op0 into a temporary on the stack top.
+ // This actually assumes that FP7 is dead.
+ duplicateToTop(Op0, 7, I);
} else {
+ // Op0 is killed, so just swap it into position.
moveToTop(Op0, I);
}
--StackTop; // "Forget" we have something on the top of stack!
@@ -1017,17 +1024,29 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
case X86::FpSET_ST1_32:
case X86::FpSET_ST1_64:
- case X86::FpSET_ST1_80:
- // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
- if (StackTop == 1) {
- BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
- StackTop = 0;
- break;
+ case X86::FpSET_ST1_80: {
+ // Set up st(1) for inline asm. We are assuming that st(0) has already been
+ // set up by FpSET_ST0, and our StackTop is off by one because of it.
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+ // Restore the actual StackTop from before Fp_SET_ST0.
+ // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+ // are not enforcing the constraint.
+ ++StackTop;
+ unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
+ if (!MI->killsRegister(X86::FP0 + Op0)) {
+ // Assume FP6 is not live, use it as a scratch register.
+ duplicateToTop(Op0, 6, I);
+ moveToTop(RegOnTop, I);
+ } else if (getSTReg(Op0) != X86::ST1) {
+ // We have the wrong value at st(1). Shuffle! Untested!
+ moveToTop(getStackEntry(1), I);
+ moveToTop(Op0, I);
+ moveToTop(RegOnTop, I);
}
- assert(StackTop == 2 && "Stack should have two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
+ assert(StackTop >= 2 && "Too few live registers");
+ StackTop -= 2; // "Forget" both st(0) and st(1).
break;
+ }
case X86::MOV_Fp3232:
case X86::MOV_Fp3264:
case X86::MOV_Fp6432:
@@ -1041,32 +1060,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
unsigned SrcReg = getFPReg(MO1);
const MachineOperand &MO0 = MI->getOperand(0);
- // These can be created due to inline asm. Two address pass can introduce
- // copies from RFP registers to virtual registers.
- if (MO0.getReg() == X86::ST0 && SrcReg == 0) {
- assert(MO1.isKill());
- // Treat %ST0<def> = MOV_Fp8080 %FP0<kill>
- // like FpSET_ST0_80 %FP0<kill>, %ST0<imp-def>
- assert((StackTop == 1 || StackTop == 2)
- && "Stack should have one or two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
- break;
- } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) {
- assert(MO1.isKill());
- // Treat %ST1<def> = MOV_Fp8080 %FP1<kill>
- // like FpSET_ST1_80 %FP0<kill>, %ST1<imp-def>
- // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them.
- if (StackTop == 1) {
- BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1);
- NumFXCH++;
- StackTop = 0;
- break;
- }
- assert(StackTop == 2 && "Stack should have two element on it to return!");
- --StackTop; // "Forget" we have something on the top of stack!
- break;
- }
-
unsigned DestReg = getFPReg(MO0);
if (MI->killsRegister(X86::FP0+SrcReg)) {
// If the input operand is killed, we can just change the owner of the
@@ -1206,3 +1199,33 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
I = MBB->erase(I); // Remove the pseudo instruction
--I;
}
+
+// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
+bool FPS::translateCopy(MachineInstr *MI) {
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+
+ if (DstReg == X86::ST0) {
+ MI->setDesc(TII->get(X86::FpSET_ST0_80));
+ MI->RemoveOperand(0);
+ return true;
+ }
+ if (DstReg == X86::ST1) {
+ MI->setDesc(TII->get(X86::FpSET_ST1_80));
+ MI->RemoveOperand(0);
+ return true;
+ }
+ if (SrcReg == X86::ST0) {
+ MI->setDesc(TII->get(X86::FpGET_ST0_80));
+ return true;
+ }
+ if (SrcReg == X86::ST1) {
+ MI->setDesc(TII->get(X86::FpGET_ST1_80));
+ return true;
+ }
+ if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
+ MI->setDesc(TII->get(X86::MOV_Fp8080));
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 747683d..2c98b96 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -72,18 +72,15 @@ static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
/// stack code, and thus needs an FP_REG_KILL.
static bool ContainsFPStackCode(MachineBasicBlock *MBB,
const MachineRegisterInfo &MRI) {
- // Scan the block, looking for instructions that define fp stack vregs.
+ // Scan the block, looking for instructions that define or use fp stack vregs.
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
- if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
- continue;
-
for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+ if (!I->getOperand(op).isReg())
continue;
-
- if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
- return true;
+ if (unsigned Reg = I->getOperand(op).getReg())
+ if (isFPStackVReg(Reg, MRI))
+ return true;
}
}
@@ -108,8 +105,8 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB,
bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// If we are emitting FP stack code, scan the basic block to determine if this
- // block defines any FP values. If so, put an FP_REG_KILL instruction before
- // the terminator of the block.
+ // block defines or uses any FP values. If so, put an FP_REG_KILL instruction
+ // before the terminator of the block.
// Note that FP stack instructions are used in all modes for long double,
// so we always need to do this check.
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 0f64383..72f2bc1 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -137,21 +137,6 @@ namespace {
}
namespace {
- class X86ISelListener : public SelectionDAG::DAGUpdateListener {
- SmallSet<SDNode*, 4> Deletes;
- public:
- explicit X86ISelListener() {}
- virtual void NodeDeleted(SDNode *N, SDNode *E) {
- Deletes.insert(N);
- }
- virtual void NodeUpdated(SDNode *N) {
- // Ignore updates.
- }
- bool IsDeleted(SDNode *N) {
- return Deletes.count(N);
- }
- };
-
//===--------------------------------------------------------------------===//
/// ISel - X86 specific code to select X86 machine instructions for
/// SelectionDAG operations.
@@ -199,16 +184,17 @@ namespace {
bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
- X86ISelListener &DeadNodes,
unsigned Depth);
bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
SDValue &Segment);
bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
- SDValue &Scale, SDValue &Index, SDValue &Disp);
+ SDValue &Scale, SDValue &Index, SDValue &Disp,
+ SDValue &Segment);
bool SelectScalarSSELoad(SDNode *Root, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
@@ -239,7 +225,8 @@ namespace {
// These are 32-bit even in 64-bit mode since RIP relative offset
// is 32-bit.
if (AM.GV)
- Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(),
+ MVT::i32, AM.Disp,
AM.SymbolFlags);
else if (AM.CP)
Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
@@ -386,14 +373,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
}
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
Ops.push_back(OrigChain.getOperand(i));
- CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size());
- CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
+ CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
Ops.clear();
Ops.push_back(SDValue(Load.getNode(), 1));
for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i)
Ops.push_back(Call.getOperand(i));
- CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size());
+ CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size());
}
/// isCalleeLoad - Return true if call address is a load and it can be
@@ -515,7 +502,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
N->getOperand(0),
MemTmp, NULL, 0, MemVT,
false, false, 0);
- SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
+ SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp,
NULL, 0, MemVT, false, false, 0);
// We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
@@ -664,8 +651,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
/// returning true if it cannot be done. This just pattern matches for the
/// addressing mode.
bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
- X86ISelListener DeadNodes;
- if (MatchAddressRecursively(N, AM, DeadNodes, 0))
+ if (MatchAddressRecursively(N, AM, 0))
return true;
// Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
@@ -713,7 +699,6 @@ static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) {
}
bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
- X86ISelListener &DeadNodes,
unsigned Depth) {
bool is64Bit = Subtarget->is64Bit();
DebugLoc dl = N.getDebugLoc();
@@ -876,13 +861,13 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
// other uses, since it avoids a two-address sub instruction, however
// it costs an additional mov if the index register has other uses.
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+
// Test if the LHS of the sub can be folded.
X86ISelAddressMode Backup = AM;
- if (MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1) ||
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- DeadNodes.IsDeleted(N.getNode())) {
+ if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
AM = Backup;
break;
}
@@ -893,7 +878,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
int Cost = 0;
- SDValue RHS = N.getNode()->getOperand(1);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
// If the RHS involves a register with multiple uses, this
// transformation incurs an extra mov, due to the neg instruction
// clobbering its operand.
@@ -944,35 +929,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
}
case ISD::ADD: {
+ // Add an artificial use to this node so that we can keep track of
+ // it if it gets CSE'd with a different node.
+ HandleSDNode Handle(N);
+ SDValue LHS = Handle.getValue().getNode()->getOperand(0);
+ SDValue RHS = Handle.getValue().getNode()->getOperand(1);
+
X86ISelAddressMode Backup = AM;
- if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1)) {
- if (DeadNodes.IsDeleted(N.getNode()))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return true;
- if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
- DeadNodes, Depth+1))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return DeadNodes.IsDeleted(N.getNode());
- }
+ if (!MatchAddressRecursively(LHS, AM, Depth+1) &&
+ !MatchAddressRecursively(RHS, AM, Depth+1))
+ return false;
+ AM = Backup;
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
// Try again after commuting the operands.
+ if (!MatchAddressRecursively(RHS, AM, Depth+1) &&
+ !MatchAddressRecursively(LHS, AM, Depth+1))
+ return false;
AM = Backup;
- if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM,
- DeadNodes, Depth+1)) {
- if (DeadNodes.IsDeleted(N.getNode()))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return true;
- if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM,
- DeadNodes, Depth+1))
- // If it is successful but the recursive update causes N to be deleted,
- // then it's not safe to continue.
- return DeadNodes.IsDeleted(N.getNode());
- }
- AM = Backup;
+ LHS = Handle.getValue().getNode()->getOperand(0);
+ RHS = Handle.getValue().getNode()->getOperand(1);
// If we couldn't fold both operands into the address at the same time,
// see if we can just put each operand into a register and fold at least
@@ -980,8 +957,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
if (AM.BaseType == X86ISelAddressMode::RegBase &&
!AM.Base_Reg.getNode() &&
!AM.IndexReg.getNode()) {
- AM.Base_Reg = N.getNode()->getOperand(0);
- AM.IndexReg = N.getNode()->getOperand(1);
+ AM.Base_Reg = LHS;
+ AM.IndexReg = RHS;
AM.Scale = 1;
return false;
}
@@ -996,7 +973,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
uint64_t Offset = CN->getSExtValue();
// Start with the LHS as an addr mode.
- if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) &&
+ if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
// Address could not have picked a GV address for the displacement.
AM.GV == NULL &&
// On x86-64, the resultant disp must fit in 32-bits.
@@ -1073,7 +1050,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
CurDAG->RepositionNode(N.getNode(), Shl.getNode());
Shl.getNode()->setNodeId(N.getNode()->getNodeId());
}
- CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes);
+ CurDAG->ReplaceAllUsesWith(N, Shl);
AM.IndexReg = And;
AM.Scale = (1 << ScaleLog);
return false;
@@ -1124,7 +1101,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId());
}
- CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes);
+ CurDAG->ReplaceAllUsesWith(N, NewSHIFT);
AM.Scale = 1 << ShiftCst;
AM.IndexReg = NewAND;
@@ -1230,7 +1207,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
SDValue &Base, SDValue &Scale,
- SDValue &Index, SDValue &Disp) {
+ SDValue &Index, SDValue &Disp,
+ SDValue &Segment) {
X86ISelAddressMode AM;
// Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
@@ -1284,7 +1262,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
if (Complexity <= 2)
return false;
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1292,10 +1269,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
- SDValue &Disp) {
+ SDValue &Disp, SDValue &Segment) {
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
-
+
X86ISelAddressMode AM;
AM.GV = GA->getGlobal();
AM.Disp += GA->getOffset();
@@ -1309,7 +1286,6 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
}
- SDValue Segment;
getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
return true;
}
@@ -1672,6 +1648,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+ // Get the low part if needed. Don't use getCopyFromReg for aliasing
+ // registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX down 8 bits.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)), 0);
+ // Then truncate it down to i8.
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the low half of the result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1682,24 +1678,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)), 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
@@ -1812,6 +1793,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0);
}
+ // Prevent use of AH in a REX instruction by referencing AX instead.
+ // Shift it down 8 bits.
+ if (HiReg == X86::AH && Subtarget->is64Bit() &&
+ !SDValue(Node, 1).use_empty()) {
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ X86::AX, MVT::i16, InFlag);
+ InFlag = Result.getValue(2);
+
+ // If we also need AL (the quotient), get it by extracting a subreg from
+ // Result. The fast register allocator does not like multiple CopyFromReg
+ // nodes using aliasing registers.
+ if (!SDValue(Node, 0).use_empty())
+ ReplaceUses(SDValue(Node, 0),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+
+ // Shift AX right by 8 bits instead of using AH.
+ Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
+ Result,
+ CurDAG->getTargetConstant(8, MVT::i8)),
+ 0);
+ ReplaceUses(SDValue(Node, 1),
+ CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
+ }
// Copy the division (low) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1822,25 +1826,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- SDValue Result;
- if (HiReg == X86::AH && Subtarget->is64Bit()) {
- // Prevent use of AH in a REX instruction by referencing AX instead.
- // Shift it down 8 bits.
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- X86::AX, MVT::i16, InFlag);
- InFlag = Result.getValue(2);
- Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
- Result,
- CurDAG->getTargetConstant(8, MVT::i8)),
- 0);
- // Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
- MVT::i8, Result);
- } else {
- Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
- HiReg, NVT, InFlag);
- InFlag = Result.getValue(2);
- }
+ SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
+ HiReg, NVT, InFlag);
+ InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 1), Result);
DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index b02c33d..1a63474 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1,
SDValue V2);
static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
- switch (TM.getSubtarget<X86Subtarget>().TargetType) {
- default: llvm_unreachable("unknown subtarget type");
- case X86Subtarget::isDarwin:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_MachoTargetObjectFile();
+
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
+
+ if (TM.getSubtarget<X86Subtarget>().isTargetDarwin()) {
+ if (is64Bit) return new X8664_MachoTargetObjectFile();
return new TargetLoweringObjectFileMachO();
- case X86Subtarget::isELF:
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- return new X8664_ELFTargetObjectFile(TM);
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetELF() ){
+ if (is64Bit) return new X8664_ELFTargetObjectFile(TM);
return new X8632_ELFTargetObjectFile(TM);
- case X86Subtarget::isMingw:
- case X86Subtarget::isCygwin:
- case X86Subtarget::isWindows:
+ } else if (TM.getSubtarget<X86Subtarget>().isTargetCOFF()) {
return new TargetLoweringObjectFileCOFF();
- }
+ }
+ llvm_unreachable("unknown subtarget type");
}
X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
@@ -347,6 +345,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (!Subtarget->hasSSE2())
setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand);
+ // On X86 and X86-64, atomic operations are lowered to locked instructions.
+ // Locked instructions, in turn, have implicit fence semantics (all memory
+ // operations are flushed before issuing the locked instruction, and they
+ // are not buffered), so we can fold away the common pattern of
+ // fence-atomic-fence.
+ setShouldFoldAtomicFences(true);
// Expand certain atomics
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom);
@@ -611,7 +615,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false);
addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false);
- addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false);
+
addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false);
setOperationAction(ISD::ADD, MVT::v8i8, Legal);
@@ -657,14 +661,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v2i32, Promote);
AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64);
- setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
- AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64);
setOperationAction(ISD::LOAD, MVT::v1i64, Legal);
setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
- setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom);
@@ -672,7 +673,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom);
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom);
@@ -691,7 +691,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
- setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
}
}
@@ -792,9 +791,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
EVT VT = SVT;
// Do not attempt to promote non-128-bit vectors
- if (!VT.is128BitVector()) {
+ if (!VT.is128BitVector())
continue;
- }
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
@@ -825,6 +823,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
if (Subtarget->hasSSE41()) {
+ setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f32, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+ setOperationAction(ISD::FRINT, MVT::f32, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+ setOperationAction(ISD::FCEIL, MVT::f64, Legal);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+
// FIXME: Do we need to handle scalar-to-vector here?
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
@@ -965,15 +974,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Add/Sub/Mul with overflow operations are custom lowered.
setOperationAction(ISD::SADDO, MVT::i32, Custom);
- setOperationAction(ISD::SADDO, MVT::i64, Custom);
setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::UADDO, MVT::i64, Custom);
setOperationAction(ISD::SSUBO, MVT::i32, Custom);
- setOperationAction(ISD::SSUBO, MVT::i64, Custom);
setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
- setOperationAction(ISD::SMULO, MVT::i64, Custom);
+
+ // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
+ // handle type legalization for these operations here.
+ //
+ // FIXME: We really should do custom legalization for addition and
+ // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
+ // than generic legalization for 64-bit multiplication-with-overflow, though.
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::SADDO, MVT::i64, Custom);
+ setOperationAction(ISD::UADDO, MVT::i64, Custom);
+ setOperationAction(ISD::SSUBO, MVT::i64, Custom);
+ setOperationAction(ISD::USUBO, MVT::i64, Custom);
+ setOperationAction(ISD::SMULO, MVT::i64, Custom);
+ }
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
@@ -992,7 +1010,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::OR);
setTargetDAGCombine(ISD::STORE);
- setTargetDAGCombine(ISD::MEMBARRIER);
setTargetDAGCombine(ISD::ZERO_EXTEND);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -1172,6 +1189,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4;
}
+bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
+ unsigned &Offset) const {
+ if (!Subtarget->isTargetLinux())
+ return false;
+
+ if (Subtarget->is64Bit()) {
+ // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
+ Offset = 0x28;
+ if (getTargetMachine().getCodeModel() == CodeModel::Kernel)
+ AddressSpace = 256;
+ else
+ AddressSpace = 257;
+ } else {
+ // %gs:0x14 on i386
+ Offset = 0x14;
+ AddressSpace = 256;
+ }
+ return true;
+}
+
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -1180,19 +1218,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
bool
X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
- return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_X86);
}
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -1220,7 +1258,7 @@ X86TargetLowering::LowerReturn(SDValue Chain,
for (unsigned i = 0; i != RVLocs.size(); ++i) {
CCValAssign &VA = RVLocs[i];
assert(VA.isRegLoc() && "Can only return in registers!");
- SDValue ValToCopy = Outs[i].Val;
+ SDValue ValToCopy = OutVals[i];
// Returns in ST0/ST1 are handled specially: these are pushed as operands to
// the RET instruction and handled by the FP Stackifier.
@@ -1308,17 +1346,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
report_fatal_error("SSE register return with SSE disabled");
}
+ SDValue Val;
+
// If this is a call to a function that returns an fp value on the floating
- // point stack, but where we prefer to use the value in xmm registers, copy
- // it out as F80 and use a truncate to move it from fp stack reg to xmm reg.
- if ((VA.getLocReg() == X86::ST0 ||
- VA.getLocReg() == X86::ST1) &&
- isScalarFPTypeInSSEReg(VA.getValVT())) {
- CopyVT = MVT::f80;
- }
+ // point stack, we must guarantee the the value is popped from the stack, so
+ // a CopyFromReg is not good enough - the copy instruction may be eliminated
+ // if the return value is not used. We use the FpGET_ST0 instructions
+ // instead.
+ if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
+ // If we prefer to use the value in xmm registers, copy it out as f80 and
+ // use a truncate to move it from fp stack reg to xmm reg.
+ if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
+ bool isST0 = VA.getLocReg() == X86::ST0;
+ unsigned Opc = 0;
+ if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
+ if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
+ if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
+ SDValue Ops[] = { Chain, InFlag };
+ Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag,
+ Ops, 2), 1);
+ Val = Chain.getValue(0);
- SDValue Val;
- if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
+ // Round the f80 to the right size, which also moves it to the appropriate
+ // xmm register.
+ if (CopyVT != VA.getValVT())
+ Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
+ // This truncation won't change the value.
+ DAG.getIntPtrConstant(1));
+ } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) {
// For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64.
if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
@@ -1338,15 +1393,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Val = Chain.getValue(0);
}
InFlag = Chain.getValue(2);
-
- if (CopyVT != VA.getValVT()) {
- // Round the F80 the right size, which also moves to the appropriate xmm
- // register.
- Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
- // This truncation won't change the value.
- DAG.getIntPtrConstant(1));
- }
-
InVals.push_back(Val);
}
@@ -1383,29 +1429,6 @@ ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) {
return Ins[0].Flags.isSRet();
}
-/// IsCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
-bool X86TargetLowering::IsCalleePop(bool IsVarArg,
- CallingConv::ID CallingConv) const {
- if (IsVarArg)
- return false;
-
- switch (CallingConv) {
- default:
- return false;
- case CallingConv::X86_StdCall:
- return !Subtarget->is64Bit();
- case CallingConv::X86_FastCall:
- return !Subtarget->is64Bit();
- case CallingConv::X86_ThisCall:
- return !Subtarget->is64Bit();
- case CallingConv::Fast:
- return GuaranteedTailCallOpt;
- case CallingConv::GHC:
- return GuaranteedTailCallOpt;
- }
-}
-
/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
/// given CallingConvention value.
CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
@@ -1483,11 +1506,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable, false);
+ VA.getLocMemOffset(), isImmutable);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
return DAG.getLoad(ValVT, dl, Chain, FIN,
PseudoSourceValue::getFixedStack(FI), 0,
@@ -1615,8 +1638,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (isVarArg) {
if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
CallConv != CallingConv::X86_ThisCall)) {
- FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
- true, false));
+ FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1722,7 +1744,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
}
// Some CCs need callee pop.
- if (IsCalleePop(isVarArg, CallConv)) {
+ if (Subtarget->IsCalleePop(isVarArg, CallConv)) {
FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
} else {
FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -1788,7 +1810,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -1802,6 +1824,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -1814,7 +1837,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
- Outs, Ins, DAG);
+ Outs, OutVals, Ins, DAG);
// Sibcalls are automatically detected tailcalls which do not require
// ABI changes.
@@ -1874,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
bool isByVal = Flags.isByVal();
@@ -2013,12 +2036,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (VA.isRegLoc())
continue;
assert(VA.isMemLoc());
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
@@ -2059,7 +2082,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
- bool WasGlobalOrExternal = false;
if (getTargetMachine().getCodeModel() == CodeModel::Large) {
assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
// In the 64-bit large code model, we have to make all calls
@@ -2067,7 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// pc-relative offset may not be large enough to hold the whole
// address.
} else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
- WasGlobalOrExternal = true;
// If the callee is a GlobalAddress node (quite common, every direct call
// is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
// it.
@@ -2095,11 +2116,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags = X86II::MO_DARWIN_STUB;
}
- Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(),
+ Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
- WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -2153,17 +2173,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Ops.push_back(InFlag);
if (isTailCall) {
- // If this is the first return lowered for this function, add the regs
- // to the liveout set for the function.
- if (MF.getRegInfo().liveout_empty()) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs,
- *DAG.getContext());
- CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
- for (unsigned i = 0; i != RVLocs.size(); ++i)
- if (RVLocs[i].isRegLoc())
- MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
- }
+ // We used to do:
+ //// If this is the first return lowered for this function, add the regs
+ //// to the liveout set for the function.
+ // This isn't right, although it's probably harmless on x86; liveouts
+ // should be computed from returns not tail calls. Consider a void
+ // function making a tail call to a function returning int.
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
}
@@ -2173,7 +2188,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the CALLSEQ_END node.
unsigned NumBytesForCalleeToPush;
- if (IsCalleePop(isVarArg, CallConv))
+ if (Subtarget->IsCalleePop(isVarArg, CallConv))
NumBytesForCalleeToPush = NumBytes; // Callee pops everything
else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
// If this is a call to a struct-return function, the callee
@@ -2314,6 +2329,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
if (!IsTailCallConvention(CalleeCC) &&
@@ -2332,8 +2348,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
}
- // Look for obvious safe cases to perform tail call optimization that does not
- // requite ABI changes. This is what gcc calls sibcall.
+ // Look for obvious safe cases to perform tail call optimization that do not
+ // require ABI changes. This is what gcc calls sibcall.
// Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
// emit a special epilogue.
@@ -2427,8 +2443,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
((X86TargetMachine&)getTargetMachine()).getInstrInfo();
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
ISD::ArgFlagsTy Flags = Outs[i].Flags;
if (VA.getLocInfo() == CCValAssign::Indirect)
return false;
@@ -2439,26 +2454,32 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
}
}
}
+
+ // If the tailcall address may be in a register, then make sure it's
+ // possible to register allocate for it. In 32-bit, the call address can
+ // only target EAX, EDX, or ECX since the tail call must be scheduled after
+ // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
+ // RDI, R8, R9, R11.
+ if (!isa<GlobalAddressSDNode>(Callee) &&
+ !isa<ExternalSymbolSDNode>(Callee)) {
+ unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
+ unsigned NumInRegs = 0;
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ if (VA.isRegLoc()) {
+ if (++NumInRegs == Limit)
+ return false;
+ }
+ }
+ }
}
return true;
}
FastISel *
-X86TargetLowering::createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &vm,
- DenseMap<const BasicBlock*, MachineBasicBlock*> &bm,
- DenseMap<const AllocaInst *, int> &am,
- std::vector<std::pair<MachineInstr*, unsigned> > &pn
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &cil
-#endif
- ) const {
- return X86::createFastISel(mf, vm, bm, am, pn
-#ifndef NDEBUG
- , cil
-#endif
- );
+X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
+ return X86::createFastISel(funcInfo);
}
@@ -2476,7 +2497,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- false, false);
+ false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -3175,7 +3196,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) {
/// constant +0.0.
bool X86::isZeroNode(SDValue Elt) {
return ((isa<ConstantSDNode>(Elt) &&
- cast<ConstantSDNode>(Elt)->getZExtValue() == 0) ||
+ cast<ConstantSDNode>(Elt)->isNullValue()) ||
(isa<ConstantFPSDNode>(Elt) &&
cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero()));
}
@@ -4433,7 +4454,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp,
}
/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide
-/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be
+/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be
/// done when every pair / quad of shuffle mask elements point to elements in
/// the right sequence. e.g.
/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15>
@@ -4447,7 +4468,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
unsigned NumElems = VT.getVectorNumElements();
unsigned NewWidth = (NumElems == 4) ? 2 : 4;
EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth);
- EVT MaskEltVT = MaskVT.getVectorElementType();
EVT NewVT = MaskVT;
switch (VT.getSimpleVT().SimpleTy) {
default: assert(false && "Unexpected!");
@@ -5059,13 +5079,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const {
SDValue
X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
- if (Op.getValueType() == MVT::v2f32)
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32,
- DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32,
- DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32,
- Op.getOperand(0))));
-
- if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64)
+
+ if (Op.getValueType() == MVT::v1i64 &&
+ Op.getOperand(0).getValueType() == MVT::i64)
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
@@ -5230,10 +5246,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
if (OpFlags == X86II::MO_NO_FLAG &&
X86::isOffsetSuitableForCodeModel(Offset, M)) {
// A direct static reference to a global.
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset);
Offset = 0;
} else {
- Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+ Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
}
if (Subtarget->isPICStyleRIPRel() &&
@@ -5278,7 +5294,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
@@ -5351,7 +5367,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
+ GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
@@ -5366,33 +5383,78 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
SDValue
X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
- // TODO: implement the "local dynamic" model
- // TODO: implement the "initial exec"model for pic executables
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
const GlobalValue *GV = GA->getGlobal();
- // If GV is an alias then use the aliasee for determining
- // thread-localness.
- if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GV = GA->resolveAliasedGlobal(false);
-
- TLSModel::Model model = getTLSModel(GV,
- getTargetMachine().getRelocationModel());
-
- switch (model) {
- case TLSModel::GeneralDynamic:
- case TLSModel::LocalDynamic: // not implemented
- if (Subtarget->is64Bit())
- return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
- return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+ if (Subtarget->isTargetELF()) {
+ // TODO: implement the "local dynamic" model
+ // TODO: implement the "initial exec"model for pic executables
+
+ // If GV is an alias then use the aliasee for determining
+ // thread-localness.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GV = GA->resolveAliasedGlobal(false);
+
+ TLSModel::Model model
+ = getTLSModel(GV, getTargetMachine().getRelocationModel());
+
+ switch (model) {
+ case TLSModel::GeneralDynamic:
+ case TLSModel::LocalDynamic: // not implemented
+ if (Subtarget->is64Bit())
+ return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
+ return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+
+ case TLSModel::InitialExec:
+ case TLSModel::LocalExec:
+ return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+ Subtarget->is64Bit());
+ }
+ } else if (Subtarget->isTargetDarwin()) {
+ // Darwin only has one model of TLS. Lower to that.
+ unsigned char OpFlag = 0;
+ unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ?
+ X86ISD::WrapperRIP : X86ISD::Wrapper;
+
+ // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+ // global base reg.
+ bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) &&
+ !Subtarget->is64Bit();
+ if (PIC32)
+ OpFlag = X86II::MO_TLVP_PIC_BASE;
+ else
+ OpFlag = X86II::MO_TLVP;
+ DebugLoc DL = Op.getDebugLoc();
+ SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL,
+ getPointerTy(),
+ GA->getOffset(), OpFlag);
+ SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+
+ // With PIC32, the address is actually $g + Offset.
+ if (PIC32)
+ Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+ DAG.getNode(X86ISD::GlobalBaseReg,
+ DebugLoc(), getPointerTy()),
+ Offset);
+
+ // Lowering the machine isd will make sure everything is in the right
+ // location.
+ SDValue Args[] = { Offset };
+ SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1);
+
+ // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setAdjustsStack(true);
- case TLSModel::InitialExec:
- case TLSModel::LocalExec:
- return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
- Subtarget->is64Bit());
+ // And our return value (tls address) is in the standard call return value
+ // location.
+ unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
+ return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy());
}
+
+ assert(false &&
+ "TLS not implemented for this target.");
llvm_unreachable("Unreachable");
return SDValue();
@@ -5715,7 +5777,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
// Load the value out, extending it from f32 to f80.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(),
FudgePtr, PseudoSourceValue::getConstantPool(),
0, MVT::f32, false, false, 4);
// Extend everything to 80 bits to force it to be done on x87.
@@ -5964,6 +6026,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
bool NeedCF = false;
bool NeedOF = false;
switch (X86CC) {
+ default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
@@ -5973,120 +6036,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
case X86::COND_O: case X86::COND_NO:
NeedOF = true;
break;
- default: break;
}
// See if we can use the EFLAGS value from the operand instead of
// doing a separate TEST. TEST always sets OF and CF to 0, so unless
// we prove that the arithmetic won't overflow, we can't use OF or CF.
- if (Op.getResNo() == 0 && !NeedOF && !NeedCF) {
- unsigned Opcode = 0;
- unsigned NumOperands = 0;
- switch (Op.getNode()->getOpcode()) {
- case ISD::ADD:
- // Due to an isel shortcoming, be conservative if this add is
- // likely to be selected as part of a load-modify-store
- // instruction. When the root node in a match is a store, isel
- // doesn't know how to remap non-chain non-flag uses of other
- // nodes in the match, such as the ADD in this case. This leads
- // to the ADD being left around and reselected, with the result
- // being two adds in the output. Alas, even if none our users
- // are stores, that doesn't prove we're O.K. Ergo, if we have
- // any parents that aren't CopyToReg or SETCC, eschew INC/DEC.
- // A better fix seems to require climbing the DAG back to the
- // root, and it doesn't seem to be worth the effort.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
- goto default_case;
- if (ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
- // An add of one will be selected as an INC.
- if (C->getAPIntValue() == 1) {
- Opcode = X86ISD::INC;
- NumOperands = 1;
- break;
- }
- // An add of negative one (subtract of one) will be selected as a DEC.
- if (C->getAPIntValue().isAllOnesValue()) {
- Opcode = X86ISD::DEC;
- NumOperands = 1;
- break;
- }
+ if (Op.getResNo() != 0 || NeedOF || NeedCF)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ unsigned Opcode = 0;
+ unsigned NumOperands = 0;
+ switch (Op.getNode()->getOpcode()) {
+ case ISD::ADD:
+ // Due to an isel shortcoming, be conservative if this add is likely to be
+ // selected as part of a load-modify-store instruction. When the root node
+ // in a match is a store, isel doesn't know how to remap non-chain non-flag
+ // uses of other nodes in the match, such as the ADD in this case. This
+ // leads to the ADD being left around and reselected, with the result being
+ // two adds in the output. Alas, even if none our users are stores, that
+ // doesn't prove we're O.K. Ergo, if we have any parents that aren't
+ // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require
+ // climbing the DAG back to the root, and it doesn't seem to be worth the
+ // effort.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI)
+ if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC)
+ goto default_case;
+
+ if (ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) {
+ // An add of one will be selected as an INC.
+ if (C->getAPIntValue() == 1) {
+ Opcode = X86ISD::INC;
+ NumOperands = 1;
+ break;
}
- // Otherwise use a regular EFLAGS-setting add.
- Opcode = X86ISD::ADD;
- NumOperands = 2;
- break;
- case ISD::AND: {
- // If the primary and result isn't used, don't bother using X86ISD::AND,
- // because a TEST instruction will be better.
- bool NonFlagUse = false;
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
- UE = Op.getNode()->use_end(); UI != UE; ++UI) {
- SDNode *User = *UI;
- unsigned UOpNo = UI.getOperandNo();
- if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
- // Look pass truncate.
- UOpNo = User->use_begin().getOperandNo();
- User = *User->use_begin();
- }
- if (User->getOpcode() != ISD::BRCOND &&
- User->getOpcode() != ISD::SETCC &&
- (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
- NonFlagUse = true;
- break;
- }
+
+ // An add of negative one (subtract of one) will be selected as a DEC.
+ if (C->getAPIntValue().isAllOnesValue()) {
+ Opcode = X86ISD::DEC;
+ NumOperands = 1;
+ break;
}
- if (!NonFlagUse)
+ }
+
+ // Otherwise use a regular EFLAGS-setting add.
+ Opcode = X86ISD::ADD;
+ NumOperands = 2;
+ break;
+ case ISD::AND: {
+ // If the primary and result isn't used, don't bother using X86ISD::AND,
+ // because a TEST instruction will be better.
+ bool NonFlagUse = false;
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ UE = Op.getNode()->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+ unsigned UOpNo = UI.getOperandNo();
+ if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) {
+ // Look pass truncate.
+ UOpNo = User->use_begin().getOperandNo();
+ User = *User->use_begin();
+ }
+
+ if (User->getOpcode() != ISD::BRCOND &&
+ User->getOpcode() != ISD::SETCC &&
+ (User->getOpcode() != ISD::SELECT || UOpNo != 0)) {
+ NonFlagUse = true;
break;
+ }
}
+
+ if (!NonFlagUse)
+ break;
+ }
// FALL THROUGH
- case ISD::SUB:
- case ISD::OR:
- case ISD::XOR:
- // Due to the ISEL shortcoming noted above, be conservative if this op is
- // likely to be selected as part of a load-modify-store instruction.
- for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
+ case ISD::SUB:
+ case ISD::OR:
+ case ISD::XOR:
+ // Due to the ISEL shortcoming noted above, be conservative if this op is
+ // likely to be selected as part of a load-modify-store instruction.
+ for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
- if (UI->getOpcode() == ISD::STORE)
- goto default_case;
- // Otherwise use a regular EFLAGS-setting instruction.
- switch (Op.getNode()->getOpcode()) {
- case ISD::SUB: Opcode = X86ISD::SUB; break;
- case ISD::OR: Opcode = X86ISD::OR; break;
- case ISD::XOR: Opcode = X86ISD::XOR; break;
- case ISD::AND: Opcode = X86ISD::AND; break;
- default: llvm_unreachable("unexpected operator!");
- }
- NumOperands = 2;
- break;
- case X86ISD::ADD:
- case X86ISD::SUB:
- case X86ISD::INC:
- case X86ISD::DEC:
- case X86ISD::OR:
- case X86ISD::XOR:
- case X86ISD::AND:
- return SDValue(Op.getNode(), 1);
- default:
- default_case:
- break;
- }
- if (Opcode != 0) {
- SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i != NumOperands; ++i)
- Ops.push_back(Op.getOperand(i));
- SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
- DAG.ReplaceAllUsesWith(Op, New);
- return SDValue(New.getNode(), 1);
+ if (UI->getOpcode() == ISD::STORE)
+ goto default_case;
+
+ // Otherwise use a regular EFLAGS-setting instruction.
+ switch (Op.getNode()->getOpcode()) {
+ default: llvm_unreachable("unexpected operator!");
+ case ISD::SUB: Opcode = X86ISD::SUB; break;
+ case ISD::OR: Opcode = X86ISD::OR; break;
+ case ISD::XOR: Opcode = X86ISD::XOR; break;
+ case ISD::AND: Opcode = X86ISD::AND; break;
}
+
+ NumOperands = 2;
+ break;
+ case X86ISD::ADD:
+ case X86ISD::SUB:
+ case X86ISD::INC:
+ case X86ISD::DEC:
+ case X86ISD::OR:
+ case X86ISD::XOR:
+ case X86ISD::AND:
+ return SDValue(Op.getNode(), 1);
+ default:
+ default_case:
+ break;
}
- // Otherwise just emit a CMP with 0, which is the TEST pattern.
- return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
- DAG.getConstant(0, Op.getValueType()));
+ if (Opcode == 0)
+ // Emit a CMP with 0, which is the TEST pattern.
+ return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
+ DAG.getConstant(0, Op.getValueType()));
+
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
+ SmallVector<SDValue, 4> Ops;
+ for (unsigned i = 0; i != NumOperands; ++i)
+ Ops.push_back(Op.getOperand(i));
+
+ SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands);
+ DAG.ReplaceAllUsesWith(Op, New);
+ return SDValue(New.getNode(), 1);
}
/// Emit nodes that will be selected as "cmp Op0,Op1", or something
@@ -6113,15 +6185,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
Op1 = Op1.getOperand(0);
SDValue LHS, RHS;
- if (Op1.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
- if (And10C->getZExtValue() == 1) {
- LHS = Op0;
- RHS = Op1.getOperand(1);
- }
- } else if (Op0.getOpcode() == ISD::SHL) {
+ if (Op1.getOpcode() == ISD::SHL)
+ std::swap(Op0, Op1);
+ if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
if (And00C->getZExtValue() == 1) {
+ // If we looked past a truncate, check that it's only truncating away
+ // known zeros.
+ unsigned BitWidth = Op0.getValueSizeInBits();
+ unsigned AndBitWidth = And.getValueSizeInBits();
+ if (BitWidth > AndBitWidth) {
+ APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones;
+ DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones);
+ if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth)
+ return SDValue();
+ }
LHS = Op1;
RHS = Op0.getOperand(1);
}
@@ -6172,7 +6250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
if (Op0.getOpcode() == ISD::AND &&
Op0.hasOneUse() &&
Op1.getOpcode() == ISD::Constant &&
- cast<ConstantSDNode>(Op1)->getZExtValue() == 0 &&
+ cast<ConstantSDNode>(Op1)->isNullValue() &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG);
if (NewSetCC.getNode())
@@ -6552,15 +6630,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
(X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0);
CCode = X86::GetOppositeBranchCondition(CCode);
CC = DAG.getConstant(CCode, MVT::i8);
- SDValue User = SDValue(*Op.getNode()->use_begin(), 0);
+ SDNode *User = *Op.getNode()->use_begin();
// Look for an unconditional branch following this conditional branch.
// We need this because we need to reverse the successors in order
// to implement FCMP_OEQ.
- if (User.getOpcode() == ISD::BR) {
- SDValue FalseBB = User.getOperand(1);
- SDValue NewBR =
- DAG.UpdateNodeOperands(User, User.getOperand(0), Dest);
+ if (User->getOpcode() == ISD::BR) {
+ SDValue FalseBB = User->getOperand(1);
+ SDNode *NewBR =
+ DAG.UpdateNodeOperands(User, User->getOperand(0), Dest);
assert(NewBR == User);
+ (void)NewBR;
Dest = FalseBB;
Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(),
@@ -6632,7 +6711,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
- EVT IntPtr = getPointerTy();
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
@@ -6685,7 +6763,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
Store = DAG.getStore(Op.getOperand(0), dl,
DAG.getConstant(FuncInfo->getVarArgsFPOffset(),
MVT::i32),
- FIN, SV, 0, false, false, 0);
+ FIN, SV, 4, false, false, 0);
MemOps.push_back(Store);
// Store ptr to overflow_arg_area
@@ -6693,7 +6771,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
FIN, DAG.getIntPtrConstant(4));
SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8,
false, false, 0);
MemOps.push_back(Store);
@@ -6702,7 +6780,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
FIN, DAG.getIntPtrConstant(8));
SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
getPointerTy());
- Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0,
+ Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16,
false, false, 0);
MemOps.push_back(Store);
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -6712,9 +6790,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
// X86-64 va_list is a struct { i32, i32, i8*, i8* }.
assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!");
- SDValue Chain = Op.getOperand(0);
- SDValue SrcPtr = Op.getOperand(1);
- SDValue SrcSV = Op.getOperand(2);
report_fatal_error("VAArgInst is not yet implemented for x86-64!");
return SDValue();
@@ -7733,6 +7808,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
+ case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";
@@ -7944,8 +8020,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -7955,17 +8034,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
newMBB->addSuccessor(newMBB);
// Insert instructions into newMBB based on incoming instruction
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
DebugLoc dl = bInstr->getDebugLoc();
MachineOperand& destOper = bInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = bInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &bInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
@@ -8008,7 +8087,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8053,8 +8132,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors to thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(bInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -8066,12 +8148,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
DebugLoc dl = bInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
// There are 8 "real" operands plus 9 implicit def/uses, ignored here.
- assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 &&
+ assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 &&
"unexpected number of operands");
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
+ for (int i=0; i < 2 + X86::AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
// We use some of the operands multiple times, so conservatively just
@@ -8081,7 +8163,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
}
// x86 address has 5 operands: base, index, scale, displacement, and segment.
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
unsigned t1 = F->getRegInfo().createVirtualRegister(RC);
MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1);
@@ -8171,7 +8253,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now.
+ bInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8205,8 +8287,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
F->insert(MBBIter, newMBB);
F->insert(MBBIter, nextMBB);
- // Move all successors of thisMBB to nextMBB
- nextMBB->transferSuccessors(thisMBB);
+ // Transfer the remainder of thisMBB and its successor edges to nextMBB.
+ nextMBB->splice(nextMBB->begin(), thisMBB,
+ llvm::next(MachineBasicBlock::iterator(mInstr)),
+ thisMBB->end());
+ nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB);
// Update thisMBB to fall through to newMBB
thisMBB->addSuccessor(newMBB);
@@ -8217,16 +8302,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
DebugLoc dl = mInstr->getDebugLoc();
// Insert instructions into newMBB based on incoming instruction
- assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 &&
+ assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 &&
"unexpected number of operands");
MachineOperand& destOper = mInstr->getOperand(0);
- MachineOperand* argOpers[2 + X86AddrNumOperands];
+ MachineOperand* argOpers[2 + X86::AddrNumOperands];
int numArgs = mInstr->getNumOperands() - 1;
for (int i=0; i < numArgs; ++i)
argOpers[i] = &mInstr->getOperand(i+1);
// x86 address has 4 operands: base, index, scale, and displacement
- int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
+ int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3]
int valArgIndx = lastAddrIndx + 1;
unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
@@ -8274,7 +8359,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
// insert branch
BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB);
- F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now.
+ mInstr->eraseFromParent(); // The pseudo instruction is gone now.
return nextMBB;
}
@@ -8284,7 +8369,6 @@ MachineBasicBlock *
X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
unsigned numArgs, bool memArg) const {
- MachineFunction *F = BB->getParent();
DebugLoc dl = MI->getDebugLoc();
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
@@ -8306,7 +8390,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB,
BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg())
.addReg(X86::XMM0);
- F->DeleteMachineInstr(MI);
+ MI->eraseFromParent();
return BB;
}
@@ -8335,9 +8419,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
F->insert(MBBIter, XMMSaveMBB);
F->insert(MBBIter, EndMBB);
- // Set up the CFG.
- // Move any original successors of MBB to the end block.
- EndMBB->transferSuccessors(MBB);
+ // Transfer the remainder of MBB and its successor edges to EndMBB.
+ EndMBB->splice(EndMBB->begin(), MBB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ MBB->end());
+ EndMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
// The original block will now fall through to the XMM save block.
MBB->addSuccessor(XMMSaveMBB);
// The XMMSaveMBB will fall through to the end block.
@@ -8376,7 +8463,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter(
.addMemOperand(MMO);
}
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return EndMBB;
}
@@ -8405,24 +8492,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- unsigned Opc =
- X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
- BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // If the EFLAGS register isn't dead in the terminator, then claim that it's
+ // live into the sink and copy blocks.
+ const MachineFunction *MF = BB->getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ BitVector ReservedRegs = TRI->getReservedRegs(*MF);
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+ if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg != X86::EFLAGS) continue;
+ copy0MBB->addLiveIn(Reg);
+ sinkMBB->addLiveIn(Reg);
+ }
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ // Create the conditional branch instruction.
+ unsigned Opc =
+ X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
+ BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -8431,11 +8533,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
// sinkMBB:
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
- BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg())
+ BuildMI(*sinkMBB, sinkMBB->begin(), DL,
+ TII->get(X86::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return sinkMBB;
}
@@ -8444,21 +8547,70 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
- MachineFunction *F = BB->getParent();
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
// FIXME: The code should be tweaked as soon as we'll try to do codegen for
// mingw-w64.
- BuildMI(BB, DL, TII->get(X86::CALLpcrel32))
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
.addExternalSymbol("_alloca")
.addReg(X86::EAX, RegState::Implicit)
.addReg(X86::ESP, RegState::Implicit)
.addReg(X86::EAX, RegState::Define | RegState::Implicit)
.addReg(X86::ESP, RegState::Define | RegState::Implicit);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+}
+
+MachineBasicBlock *
+X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const {
+ // This is pretty easy. We're taking the value that we received from
+ // our load from the relocation, sticking it in either RDI (x86-64)
+ // or EAX and doing an indirect call. The return value will then
+ // be in the normal return register.
+ const X86InstrInfo *TII
+ = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo());
+ DebugLoc DL = MI->getDebugLoc();
+ MachineFunction *F = BB->getParent();
+
+ assert(MI->getOperand(3).isGlobal() && "This should be a global");
+
+ if (Subtarget->is64Bit()) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV64rm), X86::RDI)
+ .addReg(X86::RIP)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m));
+ addDirectMem(MIB, X86::RDI);
+ } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(0)
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ } else {
+ MachineInstrBuilder MIB = BuildMI(*BB, MI, DL,
+ TII->get(X86::MOV32rm), X86::EAX)
+ .addReg(TII->getGlobalBaseReg(F))
+ .addImm(0).addReg(0)
+ .addGlobalAddress(MI->getOperand(3).getGlobal(), 0,
+ MI->getOperand(3).getTargetFlags())
+ .addReg(0);
+ MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m));
+ addDirectMem(MIB, X86::EAX);
+ }
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -8469,6 +8621,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
default: assert(false && "Unexpected instr type to insert");
case X86::MINGW_ALLOCA:
return EmitLoweredMingwAlloca(MI, BB);
+ case X86::TLSCall_32:
+ case X86::TLSCall_64:
+ return EmitLoweredTLSCall(MI, BB);
case X86::CMOV_GR8:
case X86::CMOV_V1I64:
case X86::CMOV_FR32:
@@ -8499,23 +8654,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
unsigned OldCW =
F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass);
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW),
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
CWFrameIdx);
// Set the high part to be round to zero...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx)
.addImm(0xC7F);
// Reload the modified control word now...
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
// Restore the memory image of control word to original value
- addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
+ addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx)
.addReg(OldCW);
// Get the X86 opcode to use.
@@ -8554,13 +8711,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
} else {
AM.Disp = Op.getImm();
}
- addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM)
- .addReg(MI->getOperand(X86AddrNumOperands).getReg());
+ addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM)
+ .addReg(MI->getOperand(X86::AddrNumOperands).getReg());
// Reload the original control word now.
- addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx);
+ addFrameReference(BuildMI(*BB, MI, DL,
+ TII->get(X86::FLDCW16m)), CWFrameIdx);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
// String/text processing lowering.
@@ -9513,8 +9671,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
if (ShAmt1.getOpcode() == ISD::SUB) {
SDValue Sum = ShAmt1.getOperand(0);
if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
- if (SumC->getSExtValue() == Bits &&
- ShAmt1.getOperand(1) == ShAmt0)
+ SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
+ if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+ ShAmt1Op1 = ShAmt1Op1.getOperand(0);
+ if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
return DAG.getNode(Opc, DL, VT,
Op0, Op1,
DAG.getNode(ISD::TRUNCATE, DL,
@@ -9710,58 +9870,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
-// On X86 and X86-64, atomic operations are lowered to locked instructions.
-// Locked instructions, in turn, have implicit fence semantics (all memory
-// operations are flushed before issuing the locked instruction, and the
-// are not buffered), so we can fold away the common pattern of
-// fence-atomic-fence.
-static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
- SDValue atomic = N->getOperand(0);
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- break;
- default:
- return SDValue();
- }
-
- SDValue fence = atomic.getOperand(0);
- if (fence.getOpcode() != ISD::MEMBARRIER)
- return SDValue();
-
- switch (atomic.getOpcode()) {
- case ISD::ATOMIC_CMP_SWAP:
- return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2),
- atomic.getOperand(3));
- case ISD::ATOMIC_SWAP:
- case ISD::ATOMIC_LOAD_ADD:
- case ISD::ATOMIC_LOAD_SUB:
- case ISD::ATOMIC_LOAD_AND:
- case ISD::ATOMIC_LOAD_OR:
- case ISD::ATOMIC_LOAD_XOR:
- case ISD::ATOMIC_LOAD_NAND:
- case ISD::ATOMIC_LOAD_MIN:
- case ISD::ATOMIC_LOAD_MAX:
- case ISD::ATOMIC_LOAD_UMIN:
- case ISD::ATOMIC_LOAD_UMAX:
- return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
- atomic.getOperand(1), atomic.getOperand(2));
- default:
- return SDValue();
- }
-}
-
static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) {
// (i32 zext (and (i8 x86isd::setcc_carry), 1)) ->
// (and (i32 x86isd::setcc_carry), 1)
@@ -9809,7 +9917,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
- case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG);
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG);
}
@@ -9932,8 +10039,8 @@ static bool LowerToBSwap(CallInst *CI) {
// so don't worry about this.
// Verify this is a simple bswap.
- if (CI->getNumOperands() != 2 ||
- CI->getType() != CI->getOperand(1)->getType() ||
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
!CI->getType()->isIntegerTy())
return false;
@@ -9946,7 +10053,7 @@ static bool LowerToBSwap(CallInst *CI) {
Module *M = CI->getParent()->getParent()->getParent();
Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
CI->replaceAllUsesWith(Op);
@@ -10079,7 +10186,6 @@ LowerXConstraint(EVT ConstraintVT) const {
/// vector. If it is invalid, don't add anything to Ops.
void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
char Constraint,
- bool hasMemory,
std::vector<SDValue>&Ops,
SelectionDAG &DAG) const {
SDValue Result(0, 0);
@@ -10121,9 +10227,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'e': {
// 32-bit signed value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
- C->getSExtValue())) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getSExtValue())) {
// Widen to 64 bits here to get it sign extended.
Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64);
break;
@@ -10136,9 +10241,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'Z': {
// 32-bit unsigned value
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
- const ConstantInt *CI = C->getConstantIntValue();
- if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
- C->getZExtValue())) {
+ if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()),
+ C->getZExtValue())) {
Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
break;
}
@@ -10155,6 +10259,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
break;
}
+ // In any sort of PIC mode addresses need to be computed at runtime by
+ // adding in a register or some sort of table lookup. These can't
+ // be used as immediates.
+ if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC())
+ return;
+
// If we are in non-pic codegen mode, we allow the address of a global (with
// an optional displacement) to be used with 'i'.
GlobalAddressSDNode *GA = 0;
@@ -10190,11 +10300,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
getTargetMachine())))
return;
- if (hasMemory)
- Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG);
- else
- Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset);
- Result = Op;
+ Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
+ GA->getValueType(0), Offset);
break;
}
}
@@ -10203,8 +10310,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
Ops.push_back(Result);
return;
}
- return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory,
- Ops, DAG);
+ return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
}
std::vector<unsigned> X86TargetLowering::
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 1ef1a7b..2d28e5c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -196,6 +196,10 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
+
+ // TLSCALL - Thread Local Storage. When calling to an OS provided
+ // thunk at the address from an earlier relocation.
+ TLSCALL,
// SegmentBaseAddress - The address segment:0
SegmentBaseAddress,
@@ -496,7 +500,6 @@ namespace llvm {
/// being processed is 'm'.
virtual void LowerAsmOperandForConstraint(SDValue Op,
char ConstraintLetter,
- bool hasMemory,
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
@@ -576,20 +579,17 @@ namespace llvm {
/// createFastISel - This method returns a target specific FastISel object,
/// or null if the target does not support "fast" ISel.
- virtual FastISel *
- createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &,
- DenseMap<const AllocaInst *, int> &,
- std::vector<std::pair<MachineInstr*, unsigned> > &
-#ifndef NDEBUG
- , SmallSet<const Instruction *, 8> &
-#endif
- ) const;
+ virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ /// getStackCookieLocation - Return true if the target stores stack
+ /// protector cookies at a fixed offset in some non-standard address
+ /// space, and populates the address space and offset as
+ /// appropriate.
+ virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
@@ -643,6 +643,7 @@ namespace llvm {
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
@@ -725,6 +726,7 @@ namespace llvm {
LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -733,13 +735,13 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const;
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const;
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
@@ -794,6 +796,9 @@ namespace llvm {
MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI,
MachineBasicBlock *BB) const;
+
+ MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
+ MachineBasicBlock *BB) const;
/// Emit nodes that will be selected as "test Op0,Op0", or something
/// equivalent, for use with the given x86 condition code.
@@ -806,15 +811,7 @@ namespace llvm {
};
namespace X86 {
- FastISel *createFastISel(MachineFunction &mf,
- DenseMap<const Value *, unsigned> &,
- DenseMap<const BasicBlock *, MachineBasicBlock *> &,
- DenseMap<const AllocaInst *, int> &,
- std::vector<std::pair<MachineInstr*, unsigned> > &
-#ifndef NDEBUG
- , SmallSet<const Instruction*, 8> &
-#endif
- );
+ FastISel *createFastISel(FunctionLoweringInfo &funcInfo);
}
}
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 97eb17c..42d0e7f 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -35,6 +35,14 @@ def i64i8imm : Operand<i64> {
let ParserMatchClass = ImmSExti64i8AsmOperand;
}
+def lea64_32mem : Operand<i32> {
+ let PrintMethod = "printi32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
+ let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+
// Special i64mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
@@ -44,29 +52,16 @@ def i64mem_TC : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
-def lea64mem : Operand<i64> {
- let PrintMethod = "printlea64mem";
- let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
-
-def lea64_32mem : Operand<i32> {
- let PrintMethod = "printlea64_32mem";
- let AsmOperandLowerMethod = "lower_lea64_32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
-
//===----------------------------------------------------------------------===//
// Complex Pattern Definitions.
//
-def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
+def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
-def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
-
+
//===----------------------------------------------------------------------===//
// Pattern fragments.
//
@@ -289,11 +284,11 @@ def LEA64_32r : I<0x8D, MRMSrcMem,
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>;
let isReMaterializable = 1 in
-def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src),
+def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"lea{q}\t{$src|$dst}, {$dst|$src}",
[(set GR64:$dst, lea64addr:$src)]>;
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
"bswap{q}\t$dst",
[(set GR64:$dst, (bswap GR64:$src))]>, TB;
@@ -521,7 +516,7 @@ let Defs = [EFLAGS] in {
def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
"add{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isConvertibleToThreeAddress = 1 in {
let isCommutable = 1 in
// Register-Register Addition
@@ -559,7 +554,7 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86add_flag GR64:$src1, (load addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Memory-Register Addition
def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -580,7 +575,7 @@ let Uses = [EFLAGS] in {
def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
"adc{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -606,7 +601,7 @@ def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
@@ -621,7 +616,7 @@ def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
addr:$dst)]>;
} // Uses = [EFLAGS]
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Register-Register Subtraction
def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -653,7 +648,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
"sub{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
"sub{q}\t{$src, %rax|%rax, $src}", []>;
@@ -677,7 +672,7 @@ def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
(implicit EFLAGS)]>;
let Uses = [EFLAGS] in {
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
@@ -702,7 +697,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
"sbb{q}\t{$src, %rax|%rax, $src}", []>;
@@ -736,7 +731,7 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src),
}
let Defs = [EFLAGS] in {
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
// Register-Register Signed Integer Multiplication
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
@@ -751,7 +746,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
"imul{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Suprisingly enough, these are not two address instructions!
@@ -803,7 +798,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src),
// Unary instructions
let Defs = [EFLAGS], CodeSize = 2 in {
-let isTwoAddress = 1 in
+let Constraints = "$src = $dst" in
def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst",
[(set GR64:$dst, (ineg GR64:$src)),
(implicit EFLAGS)]>;
@@ -811,14 +806,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
[(store (ineg (loadi64 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
[(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
@@ -826,7 +821,7 @@ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
(implicit EFLAGS)]>;
// In 64-bit mode, single byte INC and DEC cannot be encoded.
-let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
+let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
@@ -844,38 +839,36 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
[(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
Requires<[In64BitMode]>;
-} // isConvertibleToThreeAddress
+} // Constraints = "$src = $dst", isConvertibleToThreeAddress
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
-let isTwoAddress = 0, CodeSize = 2 in {
- def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
- [(store (add (loadi16 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
- def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
- [(store (add (loadi32 addr:$dst), 1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
- def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
- [(store (add (loadi16 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- OpSize, Requires<[In64BitMode]>;
- def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
- [(store (add (loadi32 addr:$dst), -1), addr:$dst),
- (implicit EFLAGS)]>,
- Requires<[In64BitMode]>;
-}
+def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), 1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
+def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
+ [(store (add (loadi16 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ OpSize, Requires<[In64BitMode]>;
+def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
+ [(store (add (loadi32 addr:$dst), -1), addr:$dst),
+ (implicit EFLAGS)]>,
+ Requires<[In64BitMode]>;
} // Defs = [EFLAGS], CodeSize
let Defs = [EFLAGS] in {
// Shift instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src),
+def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (shl GR64:$src, CL))]>;
+ [(set GR64:$dst, (shl GR64:$src1, CL))]>;
let isConvertibleToThreeAddress = 1 in // Can transform into LEA.
def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
@@ -885,7 +878,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst),
// 'add reg,reg' is cheaper.
def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1),
"shl{q}\t$dst", []>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst),
@@ -898,18 +891,18 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst),
"shl{q}\t$dst",
[(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src),
+def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (srl GR64:$src, CL))]>;
+ [(set GR64:$dst, (srl GR64:$src1, CL))]>;
def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2),
"shr{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>;
def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1),
"shr{q}\t$dst",
[(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst),
@@ -922,11 +915,11 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst),
"shr{q}\t$dst",
[(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src),
+def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (sra GR64:$src, CL))]>;
+ [(set GR64:$dst, (sra GR64:$src1, CL))]>;
def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"sar{q}\t{$src2, $dst|$dst, $src2}",
@@ -934,7 +927,7 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst),
def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1),
"sar{q}\t$dst",
[(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src = $dst"
let Uses = [CL] in
def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst),
@@ -949,7 +942,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst),
// Rotate instructions
-let isTwoAddress = 1 in {
+let Constraints = "$src = $dst" in {
def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src),
"rcl{q}\t{1, $dst|$dst, 1}", []>;
def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt),
@@ -966,9 +959,8 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src),
def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src),
"rcr{q}\t{%cl, $dst|$dst, CL}", []>;
}
-}
+} // Constraints = "$src = $dst"
-let isTwoAddress = 0 in {
def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst),
"rcl{q}\t{1, $dst|$dst, 1}", []>;
def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt),
@@ -984,13 +976,12 @@ def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst),
def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst),
"rcr{q}\t{%cl, $dst|$dst, CL}", []>;
}
-}
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src),
+def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotl GR64:$src, CL))]>;
+ [(set GR64:$dst, (rotl GR64:$src1, CL))]>;
def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"rol{q}\t{$src2, $dst|$dst, $src2}",
@@ -998,7 +989,7 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst),
def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1),
"rol{q}\t$dst",
[(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst),
@@ -1011,11 +1002,11 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst),
"rol{q}\t$dst",
[(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in
-def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src),
+def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t{%cl, $dst|$dst, %CL}",
- [(set GR64:$dst, (rotr GR64:$src, CL))]>;
+ [(set GR64:$dst, (rotr GR64:$src1, CL))]>;
def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i8imm:$src2),
"ror{q}\t{$src2, $dst|$dst, $src2}",
@@ -1023,7 +1014,7 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst),
def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
"ror{q}\t$dst",
[(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in
def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst),
@@ -1037,7 +1028,7 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst),
[(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>;
// Double shift instructions (generalizations of rotate)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let Uses = [CL] in {
def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1067,7 +1058,7 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg,
(i8 imm:$src3)))]>,
TB;
} // isCommutable
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
let Uses = [CL] in {
def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -1097,7 +1088,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem,
// Logical Instructions...
//
-let isTwoAddress = 1 , AddedComplexity = 15 in
+let Constraints = "$src = $dst" , AddedComplexity = 15 in
def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst",
[(set GR64:$dst, (not GR64:$src))]>;
def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
@@ -1107,7 +1098,7 @@ let Defs = [EFLAGS] in {
def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1134,7 +1125,7 @@ def AND64ri32 : RIi32<0x81, MRM4r,
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def AND64mr : RI<0x21, MRMDestMem,
(outs), (ins i64mem:$dst, GR64:$src),
@@ -1152,7 +1143,7 @@ def AND64mi32 : RIi32<0x81, MRM4m,
[(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1179,7 +1170,7 @@ def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"or{q}\t{$src, $dst|$dst, $src}",
@@ -1197,7 +1188,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -1224,7 +1215,7 @@ def XOR64ri32 : RIi32<0x81, MRM6r,
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xor{q}\t{$src, $dst|$dst, $src}",
@@ -1366,7 +1357,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2),
} // Defs = [EFLAGS]
// Conditional moves
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
let isCommutable = 1 in {
def CMOVB64rr : RI<0x42, MRMSrcReg, // if <u, GR64 = GR64
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
@@ -1530,7 +1521,7 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
"cmovno{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (X86cmov GR64:$src1, (loadi64 addr:$src2),
X86_COND_NO, EFLAGS))]>, TB;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Use sbb to materialize carry flag into a GPR.
// FIXME: This are pseudo ops that should be replaced with Pat<> patterns.
@@ -1588,7 +1579,7 @@ def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
"cvtsi2sd{q}\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}",
@@ -1601,7 +1592,7 @@ def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem,
[(set VR128:$dst,
(int_x86_sse2_cvtsi642sd VR128:$src1,
(loadi64 addr:$src2)))]>;
-} // isTwoAddress
+} // Constraints = "$src1 = $dst"
// Signed i64 -> f32
def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src),
@@ -1611,7 +1602,7 @@ def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src),
"cvtsi2ss{q}\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>;
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, GR64:$src2),
"cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}",
@@ -1625,7 +1616,7 @@ let isTwoAddress = 1 in {
[(set VR128:$dst,
(int_x86_sse_cvtsi642ss VR128:$src1,
(loadi64 addr:$src2)))]>;
-}
+} // Constraints = "$src1 = $dst"
// f32 -> signed i64
def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src),
@@ -1691,6 +1682,7 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src),
// Thread Local Storage Instructions
//===----------------------------------------------------------------------===//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -1700,7 +1692,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
".byte\t0x66; "
"leaq\t$sym(%rip), %rdi; "
".word\t0x6666; "
@@ -1709,6 +1701,17 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
+// Darwin TLS Support
+// For x86_64, the address of the thunk is passed in %rdi, on return
+// the address of the variable is in %rax. All other registers are preserved.
+let Defs = [RAX],
+ Uses = [RDI],
+ usesCustomInserter = 1 in
+def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
+ "# TLSCall_64",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In64BitMode]>;
+
let AddedComplexity = 5, isCodeGenOnly = 1 in
def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"movq\t%gs:$src, $dst",
@@ -1964,6 +1967,17 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
Requires<[In64BitMode]>;
+// tls has some funny stuff here...
+// This corresponds to movabs $foo@tpoff, %rax
+def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)),
+ (MOV64ri tglobaltlsaddr :$dst)>;
+// This corresponds to add $foo@tpoff, %rax
+def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)),
+ (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>;
+// This corresponds to mov foo@tpoff(%rbx), %eax
+def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))),
+ (MOV64rm tglobaltlsaddr :$dst)>;
+
// Comparisons.
// TEST R,R is smaller than CMP R,0
@@ -2332,45 +2346,3 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
-//===----------------------------------------------------------------------===//
-// X86-64 SSE4.1 Instructions
-//===----------------------------------------------------------------------===//
-
-/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
-multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set GR64:$dst,
- (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
- def mr : SS4AIi8<opc, MRMDestMem, (outs),
- (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
- addr:$dst)]>, OpSize, REX_W;
-}
-
-defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
-
-let isTwoAddress = 1 in {
- multiclass SS41I_insert64<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
- OpSize, REX_W;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
- imm:$src3)))]>, OpSize, REX_W;
- }
-}
-
-defm PINSRQ : SS41I_insert64<0x22, "pinsrq">;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 5a82a7b..2a6a71d 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -64,19 +64,15 @@ struct X86AddressMode {
///
static inline const MachineInstrBuilder &
addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
- // Because memory references are always represented with four
- // values, this adds: Reg, [1, NoReg, 0] to the instruction.
- return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0);
+ // Because memory references are always represented with five
+ // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction.
+ return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
}
-static inline const MachineInstrBuilder &
-addLeaOffset(const MachineInstrBuilder &MIB, int Offset) {
- return MIB.addImm(1).addReg(0).addImm(Offset);
-}
static inline const MachineInstrBuilder &
addOffset(const MachineInstrBuilder &MIB, int Offset) {
- return addLeaOffset(MIB, Offset).addReg(0);
+ return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
}
/// addRegOffset - This function is used to add a memory reference of the form
@@ -89,25 +85,20 @@ addRegOffset(const MachineInstrBuilder &MIB,
return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
}
-static inline const MachineInstrBuilder &
-addLeaRegOffset(const MachineInstrBuilder &MIB,
- unsigned Reg, bool isKill, int Offset) {
- return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset);
-}
-
/// addRegReg - This function is used to add a memory reference of the form:
/// [Reg + Reg].
static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB,
unsigned Reg1, bool isKill1,
unsigned Reg2, bool isKill2) {
return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1)
- .addReg(Reg2, getKillRegState(isKill2)).addImm(0);
+ .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0);
}
static inline const MachineInstrBuilder &
-addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
- assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
-
+addFullAddress(const MachineInstrBuilder &MIB,
+ const X86AddressMode &AM) {
+ assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8);
+
if (AM.BaseType == X86AddressMode::RegBase)
MIB.addReg(AM.Base.Reg);
else if (AM.BaseType == X86AddressMode::FrameIndexBase)
@@ -116,15 +107,11 @@ addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) {
assert (0);
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
if (AM.GV)
- return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
+ MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
- return MIB.addImm(AM.Disp);
-}
-
-static inline const MachineInstrBuilder &
-addFullAddress(const MachineInstrBuilder &MIB,
- const X86AddressMode &AM) {
- return addLeaAddress(MIB, AM).addReg(0);
+ MIB.addImm(AM.Disp);
+
+ return MIB.addReg(0);
}
/// addFrameReference - This function is used to add a reference to the base of
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index 0aae4a8..da93de9 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -371,7 +371,7 @@ multiclass FPCMov<PatLeaf cc> {
Requires<[HasCMov]>;
}
-let Uses = [EFLAGS], isTwoAddress = 1 in {
+let Uses = [EFLAGS], Constraints = "$src1 = $dst" in {
defm CMOVB : FPCMov<X86_COND_B>;
defm CMOVBE : FPCMov<X86_COND_BE>;
defm CMOVE : FPCMov<X86_COND_E>;
@@ -380,7 +380,7 @@ defm CMOVNB : FPCMov<X86_COND_AE>;
defm CMOVNBE: FPCMov<X86_COND_A>;
defm CMOVNE : FPCMov<X86_COND_NE>;
defm CMOVNP : FPCMov<X86_COND_NP>;
-}
+} // Uses = [EFLAGS], Constraints = "$src1 = $dst"
let Predicates = [HasCMov] in {
// These are not factored because there's no clean way to pass DA/DB.
@@ -680,19 +680,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
// FP extensions map onto simple pseudo-value conversions if they are to/from
// the FP stack.
-def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>,
+def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>,
+def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>,
+def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
Requires<[FPStackf64]>;
// FP truncations map onto simple pseudo-value conversions if they are to/from
// the FP stack. We have validated that only value-preserving truncations make
// it through isel.
-def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>,
+def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>,
+def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
Requires<[FPStackf32]>;
-def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>,
+def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
Requires<[FPStackf64]>;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c4522f3..97578af 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -50,9 +50,10 @@ def NoImm : ImmType<0>;
def Imm8 : ImmType<1>;
def Imm8PCRel : ImmType<2>;
def Imm16 : ImmType<3>;
-def Imm32 : ImmType<4>;
-def Imm32PCRel : ImmType<5>;
-def Imm64 : ImmType<6>;
+def Imm16PCRel : ImmType<4>;
+def Imm32 : ImmType<5>;
+def Imm32PCRel : ImmType<6>;
+def Imm64 : ImmType<7>;
// FPFormat - This specifies what form this FP instruction has. This is used by
// the Floating-Point stackifier pass.
@@ -101,6 +102,10 @@ class XS { bits<4> Prefix = 12; }
class T8 { bits<4> Prefix = 13; }
class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
+class VEX { bit hasVEXPrefix = 1; }
+class VEX_W { bit hasVEX_WPrefix = 1; }
+class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -128,6 +133,11 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
Domain ExeDomain = d;
+ bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix?
+ bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field?
+ bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
+ bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
+ // to be encoded in a immediate field?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -141,6 +151,10 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{21-20} = SegOvrBits;
let TSFlags{23-22} = ExeDomain.Value;
let TSFlags{31-24} = Opcode;
+ let TSFlags{32} = hasVEXPrefix;
+ let TSFlags{33} = hasVEX_WPrefix;
+ let TSFlags{34} = hasVEX_4VPrefix;
+ let TSFlags{35} = hasVEX_i8ImmReg;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -174,6 +188,13 @@ class Ii32<bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+class Ii16PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : X86Inst<o, f, Imm16PCRel, outs, ins, asm> {
+ let Pattern = pattern;
+ let CodeSize = 3;
+}
+
class Ii32PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
list<dag> pattern>
: X86Inst<o, f, Imm32PCRel, outs, ins, asm> {
@@ -211,11 +232,56 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
let CodeSize = 3;
}
+// SI - SSE 1 & 2 scalar instructions
+class SI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
+ : I<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// SIi8 - SSE 1 & 2 scalar instructions
+class SIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : Ii8<o, F, outs, ins, asm, pattern> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PI - SSE 1 & 2 packed instructions
+class PI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern,
+ Domain d>
+ : I<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm);
+}
+
+// PIi8 - SSE 1 & 2 packed instructions with immediate
+class PIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d>
+ : Ii8<o, F, outs, ins, asm, pattern, d> {
+ let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX],
+ !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1]));
+
+ // AVX instructions have a 'v' prefix in the mnemonic
+ let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm);
+}
+
// SSE1 Instruction Templates:
//
// SSI - SSE1 instructions with XS prefix.
// PSI - SSE1 instructions with TB prefix.
// PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+// VSSI - SSE1 instructions with XS prefix in AVX form.
+// VPSI - SSE1 instructions with TB prefix in AVX form.
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
@@ -229,6 +295,14 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS,
+ Requires<[HasAVX]>;
+class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+ Requires<[HasAVX]>;
// SSE2 Instruction Templates:
//
@@ -237,6 +311,8 @@ class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
// SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
// PDI - SSE2 instructions with TB and OpSize prefixes.
// PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+// VSDI - SSE2 instructions with XD prefix in AVX form.
+// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form.
class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
@@ -253,6 +329,14 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD,
+ Requires<[HasAVX]>;
+class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+ list<dag> pattern>
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+ OpSize, Requires<[HasAVX]>;
// SSE3 Instruction Templates:
//
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 6b9478d..71c4e8b 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -60,3 +60,339 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
}], MMX_SHUFFLE_get_shuf_imm>;
+
+//===----------------------------------------------------------------------===//
+// SSE specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
+ SDTCisFP<0>, SDTCisInt<2> ]>;
+def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
+ SDTCisFP<1>, SDTCisVT<3, i8>]>;
+
+def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
+def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
+def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
+ [SDNPCommutative, SDNPAssociative]>;
+def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
+def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
+def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
+def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
+def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
+def X86pshufb : SDNode<"X86ISD::PSHUFB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisSameAs<0,2>]>>;
+def X86pextrb : SDNode<"X86ISD::PEXTRB",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pextrw : SDNode<"X86ISD::PEXTRW",
+ SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
+def X86pinsrb : SDNode<"X86ISD::PINSRB",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86pinsrw : SDNode<"X86ISD::PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+def X86insrtps : SDNode<"X86ISD::INSERTPS",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
+def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
+ SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
+def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad]>;
+def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
+def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
+def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
+def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
+def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
+def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
+def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
+def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
+def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
+
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>;
+def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
+
+//===----------------------------------------------------------------------===//
+// SSE Complex Patterns
+//===----------------------------------------------------------------------===//
+
+// These are 'extloads' from a scalar to the low element of a vector, zeroing
+// the top elements. These are used for the SSE 'ss' and 'sd' instruction
+// forms.
+def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
+ [SDNPHasChain, SDNPMayLoad]>;
+
+def ssmem : Operand<v4f32> {
+ let PrintMethod = "printf32mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+def sdmem : Operand<v2f64> {
+ let PrintMethod = "printf64mem";
+ let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
+ let ParserMatchClass = X86MemAsmOperand;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE pattern fragments
+//===----------------------------------------------------------------------===//
+
+def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
+def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
+def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
+def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>;
+def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>;
+def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>;
+def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
+
+// Like 'store', but always requires vector alignment.
+def alignedstore : PatFrag<(ops node:$val, node:$ptr),
+ (store node:$val, node:$ptr), [{
+ return cast<StoreSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+// Like 'load', but always requires vector alignment.
+def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+ (f32 (alignedload node:$ptr))>;
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+ (f64 (alignedload node:$ptr))>;
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+ (v4f32 (alignedload node:$ptr))>;
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+ (v2f64 (alignedload node:$ptr))>;
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+ (v4i32 (alignedload node:$ptr))>;
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+ (v2i64 (alignedload node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def alignedloadv8f32 : PatFrag<(ops node:$ptr),
+ (v8f32 (alignedload node:$ptr))>;
+def alignedloadv4f64 : PatFrag<(ops node:$ptr),
+ (v4f64 (alignedload node:$ptr))>;
+def alignedloadv8i32 : PatFrag<(ops node:$ptr),
+ (v8i32 (alignedload node:$ptr))>;
+def alignedloadv4i64 : PatFrag<(ops node:$ptr),
+ (v4i64 (alignedload node:$ptr))>;
+
+// Like 'load', but uses special alignment checks suitable for use in
+// memory operands in most SSE instructions, which are required to
+// be naturally aligned on some targets but not on others. If the subtarget
+// allows unaligned accesses, match any load, though this may require
+// setting a feature bit in the processor (on startup, for example).
+// Opteron 10h and later implement such a feature.
+def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return Subtarget->hasVectorUAMem()
+ || cast<LoadSDNode>(N)->getAlignment() >= 16;
+}]>;
+
+def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
+def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
+def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
+def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
+def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
+def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
+def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
+
+// FIXME: move this to a more appropriate place after all AVX is done.
+def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>;
+def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>;
+
+// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
+// 16-byte boundary.
+// FIXME: 8 byte alignment for mmx reads is not required
+def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ return cast<LoadSDNode>(N)->getAlignment() >= 8;
+}]>;
+
+def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
+def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
+def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
+def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
+def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
+def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
+def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
+def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
+def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
+def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
+
+def vzmovl_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
+def vzmovl_v4i32 : PatFrag<(ops node:$src),
+ (bitconvert (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
+
+def vzload_v2i64 : PatFrag<(ops node:$src),
+ (bitconvert (v2i64 (X86vzload node:$src)))>;
+
+
+def fp32imm0 : PatLeaf<(f32 fpimm), [{
+ return N->isExactlyValue(+0.0);
+}]>;
+
+// BYTE_imm - Transform bit immediates into byte immediates.
+def BYTE_imm : SDNodeXForm<imm, [{
+ // Transformation function: imm >> 3
+ return getI32Imm(N->getZExtValue() >> 3);
+}]>;
+
+// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
+// SHUFP* etc. imm.
+def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShuffleSHUFImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
+// PSHUFHW imm.
+def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
+}]>;
+
+// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
+// PSHUFLW imm.
+def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
+}]>;
+
+// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
+// a PALIGNR imm.
+def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
+ return getI8Imm(X86::getShufflePALIGNRImmediate(N));
+}]>;
+
+def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+ return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
+}]>;
+
+def movddup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movlp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
+}]>;
+
+def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def shufp : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_shuf_imm>;
+
+def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshufhw_imm>;
+
+def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_pshuflw_imm>;
+
+def palign : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
+}], SHUFFLE_get_palign_imm>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 34e12ca..ce471ea 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) {
case X86::MOV8rm:
case X86::MOV16rm:
case X86::MOV32rm:
+ case X86::MOV32rm_TC:
case X86::MOV64rm:
+ case X86::MOV64rm_TC:
case X86::LD_Fp64m:
case X86::MOVSSrm:
case X86::MOVSDrm:
@@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) {
case X86::MOV8mr:
case X86::MOV16mr:
case X86::MOV32mr:
+ case X86::MOV32mr_TC:
case X86::MOV64mr:
+ case X86::MOV64mr_TC:
case X86::ST_FpP64m:
case X86::MOVSSmr:
case X86::MOVSDmr:
@@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
int &FrameIndex) const {
if (isFrameStoreOpcode(MI->getOpcode()))
if (isFrameOperand(MI, 0, FrameIndex))
- return MI->getOperand(X86AddrNumOperands).getReg();
+ return MI->getOperand(X86::AddrNumOperands).getReg();
return 0;
}
@@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const {
+ const TargetRegisterInfo &TRI) const {
DebugLoc DL = Orig->getDebugLoc();
- if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = TRI->getSubReg(DestReg, SubIdx);
- SubIdx = 0;
- }
-
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// Re-materialize them as movri instructions to avoid side effects.
bool Clone = true;
@@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
if (Clone) {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
MBB.insert(I, MI);
} else {
- BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0);
+ BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0);
}
MachineInstr *NewMI = prior(I);
- NewMI->getOperand(0).setSubReg(SubIdx);
+ NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
}
/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that
@@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// least on modern x86 machines).
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::sub_16bit);
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg, RegState::Define, X86::sub_16bit)
+ .addReg(Src, getKillRegState(isKill));
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
@@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
case X86::SHL16ri: {
unsigned ShAmt = MI->getOperand(2).getImm();
MIB.addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill).addImm(0);
+ .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0);
break;
}
case X86::INC16r:
case X86::INC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, 1);
+ addRegOffset(MIB, leaInReg, true, 1);
break;
case X86::DEC16r:
case X86::DEC64_16r:
- addLeaRegOffset(MIB, leaInReg, true, -1);
+ addRegOffset(MIB, leaInReg, true, -1);
break;
case X86::ADD16ri:
case X86::ADD16ri8:
- addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
break;
case X86::ADD16rr: {
unsigned Src2 = MI->getOperand(2).getReg();
@@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
// well be shifting and then extracting the lower 16-bits.
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
InsMI2 =
- BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
- .addReg(leaInReg2)
- .addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::sub_16bit);
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY))
+ .addReg(leaInReg2, RegState::Define, X86::sub_16bit)
+ .addReg(Src2, getKillRegState(isKill2));
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
@@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
MachineInstr *NewMI = MIB;
MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::sub_16bit);
+ .addReg(leaOutReg, RegState::Kill, X86::sub_16bit);
if (LV) {
// Update live variables
@@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
case X86::SHL32ri: {
@@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill)).addImm(0);
+ .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0);
break;
}
case X86::SHL16ri: {
@@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
.addReg(Src, getKillRegState(isKill))
- .addImm(0);
+ .addImm(0).addReg(0);
break;
}
default: {
@@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, 1);
@@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r
: (is64Bit ? X86::LEA64_32r : X86::LEA32r);
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, -1);
@@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (DisableLEA16)
return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
@@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned
X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const {
- // FIXME this should probably have a DebugLoc operand
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const {
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
if (Cond.empty()) {
// Unconditional branch?
assert(!FBB && "Unconditional branch with multiple successors!");
- BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB);
return 1;
}
@@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
switch (CC) {
case X86::COND_NP_OR_E:
// Synthesize NP_OR_E with two branches.
- BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB);
++Count;
break;
case X86::COND_NE_OR_P:
// Synthesize NE_OR_P with two branches.
- BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB);
++Count;
- BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB);
++Count;
break;
default: {
unsigned Opc = GetCondBranchFromCond(CC);
- BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(Opc)).addMBB(TBB);
++Count;
}
}
if (FBB) {
// Two-way Conditional branch. Insert the second branch.
- BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB);
++Count;
}
return Count;
@@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) {
return X86::GR8_ABCD_HRegClass.contains(Reg);
}
-bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- // Determine if DstRC and SrcRC have a common superclass in common.
- const TargetRegisterClass *CommonRC = DestRC;
- if (DestRC == SrcRC)
- /* Source and destination have the same register class. */;
- else if (CommonRC->hasSuperClass(SrcRC))
- CommonRC = SrcRC;
- else if (!DestRC->hasSubClass(SrcRC)) {
- // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
- // but we want to copy them as GR64. Similarly, for GR32_NOREX and
- // GR32_NOSP, copy as GR32.
- if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
- DestRC->hasSuperClass(&X86::GR64RegClass))
- CommonRC = &X86::GR64RegClass;
- else if (SrcRC->hasSuperClass(&X86::GR32RegClass) &&
- DestRC->hasSuperClass(&X86::GR32RegClass))
- CommonRC = &X86::GR32RegClass;
+void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ // First deal with the normal symmetric copies.
+ unsigned Opc = 0;
+ if (X86::GR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV64rr;
+ else if (X86::GR32RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV32rr;
+ else if (X86::GR16RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOV16rr;
+ else if (X86::GR8RegClass.contains(DestReg, SrcReg)) {
+ // Copying to or from a physical H register on x86-64 requires a NOREX
+ // move. Otherwise use a normal move.
+ if ((isHReg(DestReg) || isHReg(SrcReg)) &&
+ TM.getSubtarget<X86Subtarget>().is64Bit())
+ Opc = X86::MOV8rr_NOREX;
else
- CommonRC = 0;
- }
-
- if (CommonRC) {
- unsigned Opc;
- if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32RegClass ||
- CommonRC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16RegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if ((isHReg(DestReg) || isHReg(SrcReg)) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_ABCD_LRegClass) {
Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rr_NOREX;
- else
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_NOREXRegClass ||
- CommonRC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rr;
- } else if (CommonRC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rr;
- } else if (CommonRC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rr;
- } else if (CommonRC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8rr;
- } else if (CommonRC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64rr_TC;
- } else if (CommonRC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32rr_TC;
- } else if (CommonRC == &X86::RFP32RegClass) {
- Opc = X86::MOV_Fp3232;
- } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
- Opc = X86::MOV_Fp6464;
- } else if (CommonRC == &X86::RFP80RegClass) {
- Opc = X86::MOV_Fp8080;
- } else if (CommonRC == &X86::FR32RegClass) {
- Opc = X86::FsMOVAPSrr;
- } else if (CommonRC == &X86::FR64RegClass) {
- Opc = X86::FsMOVAPDrr;
- } else if (CommonRC == &X86::VR128RegClass) {
- Opc = X86::MOVAPSrr;
- } else if (CommonRC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rr;
- } else {
- return false;
- }
- BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg);
- return true;
+ } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MOVAPSrr;
+ else if (X86::VR64RegClass.contains(DestReg, SrcReg))
+ Opc = X86::MMX_MOVQ64rr;
+
+ if (Opc) {
+ BuildMI(MBB, MI, DL, get(Opc), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
// Moving EFLAGS to / from another register requires a push and a pop.
- if (SrcRC == &X86::CCRRegClass) {
- if (SrcReg != X86::EFLAGS)
- return false;
- if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
+ if (SrcReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
- return true;
- } else if (DestRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
+ return;
+ } else if (X86::GR32RegClass.contains(DestReg)) {
BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
- return true;
+ return;
}
- } else if (DestRC == &X86::CCRRegClass) {
- if (DestReg != X86::EFLAGS)
- return false;
- if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
+ }
+ if (DestReg == X86::EFLAGS) {
+ if (X86::GR64RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH64r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF64));
- return true;
- } else if (SrcRC == &X86::GR32RegClass ||
- DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
+ return;
+ } else if (X86::GR32RegClass.contains(SrcReg)) {
+ BuildMI(MBB, MI, DL, get(X86::PUSH32r))
+ .addReg(SrcReg, getKillRegState(KillSrc));
BuildMI(MBB, MI, DL, get(X86::POPF32));
- return true;
- }
- }
-
- // Moving from ST(0) turns into FpGET_ST0_32 etc.
- if (SrcRC == &X86::RSTRegClass) {
- // Copying from ST(0)/ST(1).
- if (SrcReg != X86::ST0 && SrcReg != X86::ST1)
- // Can only copy from ST(0)/ST(1) right now
- return false;
- bool isST0 = SrcReg == X86::ST0;
- unsigned Opc;
- if (DestRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32;
- else if (DestRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64;
- else {
- if (DestRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80;
+ return;
}
- BuildMI(MBB, MI, DL, get(Opc), DestReg);
- return true;
}
- // Moving to ST(0) turns into FpSET_ST0_32 etc.
- if (DestRC == &X86::RSTRegClass) {
- // Copying to ST(0) / ST(1).
- if (DestReg != X86::ST0 && DestReg != X86::ST1)
- // Can only copy to TOS right now
- return false;
- bool isST0 = DestReg == X86::ST0;
- unsigned Opc;
- if (SrcRC == &X86::RFP32RegClass)
- Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32;
- else if (SrcRC == &X86::RFP64RegClass)
- Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64;
- else {
- if (SrcRC != &X86::RFP80RegClass)
- return false;
- Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80;
- }
- BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg);
- return true;
- }
-
- // Not yet supported!
- return false;
+ DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg)
+ << " to " << RI.getName(DestReg) << '\n');
+ llvm_unreachable("Cannot emit physreg copy instruction");
}
-static unsigned getStoreRegOpcode(unsigned SrcReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8RegClass) {
+static unsigned getLoadStoreRegOpcode(unsigned Reg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM,
+ bool load) {
+ switch (RC->getID()) {
+ default:
+ llvm_unreachable("Unknown regclass");
+ case X86::GR64RegClassID:
+ case X86::GR64_NOSPRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32RegClassID:
+ case X86::GR32_NOSPRegClassID:
+ case X86::GR32_ADRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16RegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8RegClassID:
// Copying to or from a physical H register on x86-64 requires a NOREX
// move. Otherwise use a normal move.
- if (isHReg(SrcReg) &&
+ if (isHReg(Reg) &&
TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_ABCDRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32_ABCDRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16_ABCDRegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8_ABCD_LRegClassID:
+ return load ? X86::MOV8rm :X86::MOV8mr;
+ case X86::GR8_ABCD_HRegClassID:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8mr_NOREX;
+ return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
else
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64mr;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32mr;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16mr;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8mr;
- } else if (RC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64mr_TC;
- } else if (RC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32mr_TC;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::ST_FpP80m; // pops
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::ST_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::ST_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSmr;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDmr;
- } else if (RC == &X86::VR128RegClass) {
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_NOREXRegClassID:
+ case X86::GR64_NOREX_NOSPRegClassID:
+ return load ? X86::MOV64rm : X86::MOV64mr;
+ case X86::GR32_NOREXRegClassID:
+ return load ? X86::MOV32rm : X86::MOV32mr;
+ case X86::GR16_NOREXRegClassID:
+ return load ? X86::MOV16rm : X86::MOV16mr;
+ case X86::GR8_NOREXRegClassID:
+ return load ? X86::MOV8rm : X86::MOV8mr;
+ case X86::GR64_TCRegClassID:
+ return load ? X86::MOV64rm_TC : X86::MOV64mr_TC;
+ case X86::GR32_TCRegClassID:
+ return load ? X86::MOV32rm_TC : X86::MOV32mr_TC;
+ case X86::RFP80RegClassID:
+ return load ? X86::LD_Fp80m : X86::ST_FpP80m;
+ case X86::RFP64RegClassID:
+ return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+ case X86::RFP32RegClassID:
+ return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+ case X86::FR32RegClassID:
+ return load ? X86::MOVSSrm : X86::MOVSSmr;
+ case X86::FR64RegClassID:
+ return load ? X86::MOVSDrm : X86::MOVSDmr;
+ case X86::VR128RegClassID:
// If stack is realigned we can use aligned stores.
- Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64mr;
- } else {
- llvm_unreachable("Unknown regclass");
+ if (isStackAligned)
+ return load ? X86::MOVAPSrm : X86::MOVAPSmr;
+ else
+ return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+ case X86::VR64RegClassID:
+ return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
}
+}
+
+static unsigned getStoreRegOpcode(unsigned SrcReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ TargetMachine &TM) {
+ return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false);
+}
- return Opc;
+
+static unsigned getLoadRegOpcode(unsigned DestReg,
+ const TargetRegisterClass *RC,
+ bool isStackAligned,
+ const TargetMachine &TM) {
+ return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true);
}
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
NewMIs.push_back(MIB);
}
-static unsigned getLoadRegOpcode(unsigned DestReg,
- const TargetRegisterClass *RC,
- bool isStackAligned,
- const TargetMachine &TM) {
- unsigned Opc = 0;
- if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16RegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8RegClass) {
- // Copying to or from a physical H register on x86-64 requires a NOREX
- // move. Otherwise use a normal move.
- if (isHReg(DestReg) &&
- TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_ABCDRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_ABCDRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_ABCDRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_ABCD_LRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR8_ABCD_HRegClass) {
- if (TM.getSubtarget<X86Subtarget>().is64Bit())
- Opc = X86::MOV8rm_NOREX;
- else
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_NOREXRegClass ||
- RC == &X86::GR64_NOREX_NOSPRegClass) {
- Opc = X86::MOV64rm;
- } else if (RC == &X86::GR32_NOREXRegClass) {
- Opc = X86::MOV32rm;
- } else if (RC == &X86::GR16_NOREXRegClass) {
- Opc = X86::MOV16rm;
- } else if (RC == &X86::GR8_NOREXRegClass) {
- Opc = X86::MOV8rm;
- } else if (RC == &X86::GR64_TCRegClass) {
- Opc = X86::MOV64rm_TC;
- } else if (RC == &X86::GR32_TCRegClass) {
- Opc = X86::MOV32rm_TC;
- } else if (RC == &X86::RFP80RegClass) {
- Opc = X86::LD_Fp80m;
- } else if (RC == &X86::RFP64RegClass) {
- Opc = X86::LD_Fp64m;
- } else if (RC == &X86::RFP32RegClass) {
- Opc = X86::LD_Fp32m;
- } else if (RC == &X86::FR32RegClass) {
- Opc = X86::MOVSSrm;
- } else if (RC == &X86::FR64RegClass) {
- Opc = X86::MOVSDrm;
- } else if (RC == &X86::VR128RegClass) {
- // If stack is realigned we can use aligned loads.
- Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm;
- } else if (RC == &X86::VR64RegClass) {
- Opc = X86::MMX_MOVQ64rm;
- } else {
- llvm_unreachable("Unknown regclass");
- }
-
- return Opc;
-}
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
@@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (*MMOBegin)->getAlignment() >= 16;
+ bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL;
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- const TargetRegisterClass *RegClass = CSI[i-1].getRegClass();
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
if (Reg == FPReg)
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
- &RI);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ &X86::VR128RegClass, &RI);
}
}
@@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (Reg == FPReg)
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
continue;
- const TargetRegisterClass *RegClass = CSI[i].getRegClass();
- if (RegClass != &X86::VR128RegClass && !isWin64) {
+ if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ &X86::VR128RegClass, &RI);
}
}
return true;
@@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
}
// No fusion
- if (PrintFailedFusing)
+ if (PrintFailedFusing && !MI->isCopy())
dbgs() << "We failed to fuse operand " << i << " in " << *MI;
return NULL;
}
@@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
} else if (Ops.size() != 1)
return NULL;
- SmallVector<MachineOperand,X86AddrNumOperands> MOs;
+ SmallVector<MachineOperand,X86::AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
case X86::V_SET0PS:
case X86::V_SET0PD:
@@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
if (TM.getSubtarget<X86Subtarget>().is64Bit())
PICBase = X86::RIP;
else
- // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF);
+ // FIXME: PICBase = getGlobalBaseReg(&MF);
// This doesn't work for several reasons.
// 1. GlobalBaseReg may have been spilled.
// 2. It may not be live at MI.
@@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
default: {
// Folding a normal load. Just copy the load's address operands.
unsigned NumOps = LoadMI->getDesc().getNumOperands();
- for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i)
+ for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i)
MOs.push_back(LoadMI->getOperand(i));
break;
}
@@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (I != OpcodeTablePtr->end())
return true;
}
- return false;
+ return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops);
}
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
@@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
const TargetInstrDesc &TID = get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = TOI.getRegClass(&RI);
- SmallVector<MachineOperand, X86AddrNumOperands> AddrOps;
+ if (!MI->hasOneMemOperand() &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Without memoperands, loadRegFromAddr and storeRegToStackSlot will
+ // conservatively assume the address is unaligned. That's bad for
+ // performance.
+ return false;
+ SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps;
SmallVector<MachineOperand,2> BeforeOps;
SmallVector<MachineOperand,2> AfterOps;
SmallVector<MachineOperand,4> ImpOps;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &Op = MI->getOperand(i);
- if (i >= Index && i < Index + X86AddrNumOperands)
+ if (i >= Index && i < Index + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (Op.isReg() && Op.isImplicit())
ImpOps.push_back(Op);
@@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs);
if (UnfoldStore) {
// Address operands cannot be marked isKill.
- for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) {
+ for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) {
MachineOperand &MO = NewMIs[0]->getOperand(i);
if (MO.isReg())
MO.setIsKill(false);
@@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned NumOps = N->getNumOperands();
for (unsigned i = 0; i != NumOps-1; ++i) {
SDValue Op = N->getOperand(i);
- if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands)
+ if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands)
AddrOps.push_back(Op);
else if (i < Index-NumDefs)
BeforeOps.push_back(Op);
@@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned load.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
@@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineInstr::mmo_iterator> MMOs =
MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
cast<MachineSDNode>(N)->memoperands_end());
- bool isAligned = (*MMOs.first)->getAlignment() >= 16;
+ if (!(*MMOs.first) &&
+ RC == &X86::VR128RegClass &&
+ !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
+ // Do not introduce a slow unaligned store.
+ return false;
+ bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
EVT VT = Load1->getValueType(0);
switch (VT.getSimpleVT().SimpleTy) {
- default: {
+ default:
// XMM registers. In 64-bit mode we can be a bit more aggressive since we
// have 16 of them to play with.
if (TM.getSubtargetImpl()->is64Bit()) {
if (NumLoads >= 3)
return false;
- } else if (NumLoads)
+ } else if (NumLoads) {
return false;
+ }
break;
- }
case MVT::i8:
case MVT::i16:
case MVT::i32:
@@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
case MVT::f64:
if (NumLoads)
return false;
+ break;
}
return true;
@@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) {
case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B:
case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11:
case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15:
+ case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11:
+ case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15:
return true;
}
return false;
@@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) {
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e)))
REX |= 1 << 2;
@@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
case X86II::MRMDestMem: {
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps) {
++CurOp;
FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
@@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
break;
case X86II::MRMSrcMem: {
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
-
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode);
- CurOp += AddrOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
if (CurOp != NumOps) {
++CurOp;
FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags));
@@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI,
++FinalSize;
FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
if (CurOp != NumOps) {
const MachineOperand &MO = MI.getOperand(CurOp++);
@@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
/// the global base register value. Output instructions required to
/// initialize the register in the function entry block, if necessary.
///
+/// TODO: Eliminate this and move the code to X86MachineFunctionInfo.
+///
unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
assert(!TM.getSubtarget<X86Subtarget>().is64Bit() &&
"X86-64 PIC uses RIP relative addressing");
@@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
if (GlobalBaseReg != 0)
return GlobalBaseReg;
- // Insert the set of GlobalBaseReg into the first MBB of the function
- MachineBasicBlock &FirstMBB = MF->front();
- MachineBasicBlock::iterator MBBI = FirstMBB.begin();
- DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ // Create the register. The code to initialize it is inserted
+ // later, by the CGBR pass (below).
MachineRegisterInfo &RegInfo = MF->getRegInfo();
- unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
-
- const TargetInstrInfo *TII = TM.getInstrInfo();
- // Operand of MovePCtoStack is completely ignored by asm printer. It's
- // only used in JIT code emission as displacement to pc.
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
-
- // If we're using vanilla 'GOT' PIC style, we should use relative addressing
- // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
- if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
- GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
- // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
- BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
- .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
- X86II::MO_GOT_ABSOLUTE_ADDRESS);
- } else {
- GlobalBaseReg = PC;
- }
-
+ GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
@@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
+namespace {
+ /// CGBR - Create Global Base Reg pass. This initializes the PIC
+ /// global base register for x86-32.
+ struct CGBR : public MachineFunctionPass {
+ static char ID;
+ CGBR() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ const X86TargetMachine *TM =
+ static_cast<const X86TargetMachine *>(&MF.getTarget());
+
+ assert(!TM->getSubtarget<X86Subtarget>().is64Bit() &&
+ "X86-64 PIC uses RIP relative addressing");
+
+ // Only emit a global base reg in PIC mode.
+ if (TM->getRelocationModel() != Reloc::PIC_)
+ return false;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF.front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ DebugLoc DL = FirstMBB.findDebugLoc(MBBI);
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ const X86InstrInfo *TII = TM->getInstrInfo();
+
+ unsigned PC;
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT())
+ PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+ else
+ PC = TII->getGlobalBaseReg(&MF);
+
+ // Operand of MovePCtoStack is completely ignored by asm printer. It's
+ // only used in JIT code emission as displacement to pc.
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
+
+ // If we're using vanilla 'GOT' PIC style, we should use relative addressing
+ // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
+ if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) {
+ unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF);
+ // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
+ BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
+ .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_",
+ X86II::MO_GOT_ABSOLUTE_ADDRESS);
+ }
+
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "X86 PIC Global Base Reg Initialization";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+}
+
+char CGBR::ID = 0;
+FunctionPass*
+llvm::createGlobalBaseRegPass() { return new CGBR(); }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 62d7c74..f762b58 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -24,6 +24,24 @@ namespace llvm {
class X86TargetMachine;
namespace X86 {
+ // Enums for memory operand decoding. Each memory operand is represented with
+ // a 5 operand sequence in the form:
+ // [BaseReg, ScaleAmt, IndexReg, Disp, Segment]
+ // These enums help decode this.
+ enum {
+ AddrBaseReg = 0,
+ AddrScaleAmt = 1,
+ AddrIndexReg = 2,
+ AddrDisp = 3,
+
+ /// AddrSegmentReg - The operand # of the segment in the memory operand.
+ AddrSegmentReg = 4,
+
+ /// AddrNumOperands - Total number of operands in a memory reference.
+ AddrNumOperands = 5
+ };
+
+
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -173,7 +191,19 @@ namespace X86II {
/// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
/// which is a PIC-base-relative reference to a hidden dyld lazy pointer
/// stub.
- MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE
+ MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
+
+ /// MO_TLVP - On a symbol operand this indicates that the immediate is
+ /// some TLS offset.
+ ///
+ /// This is the TLS offset for the Darwin TLS mechanism.
+ MO_TLVP,
+
+ /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
+ /// is some TLS offset from the picbase.
+ ///
+ /// This is the 32-bit TLS offset for Darwin TLS in PIC mode.
+ MO_TLVP_PIC_BASE
};
}
@@ -203,6 +233,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
case X86II::MO_PIC_BASE_OFFSET: // Darwin local global.
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global.
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global.
+ case X86II::MO_TLVP: // ??? Pretty sure..
return true;
default:
return false;
@@ -347,9 +378,10 @@ namespace X86II {
Imm8 = 1 << ImmShift,
Imm8PCRel = 2 << ImmShift,
Imm16 = 3 << ImmShift,
- Imm32 = 4 << ImmShift,
- Imm32PCRel = 5 << ImmShift,
- Imm64 = 6 << ImmShift,
+ Imm16PCRel = 4 << ImmShift,
+ Imm32 = 5 << ImmShift,
+ Imm32PCRel = 6 << ImmShift,
+ Imm64 = 7 << ImmShift,
//===------------------------------------------------------------------===//
// FP Instruction Classification... Zero is non-fp instruction.
@@ -403,28 +435,47 @@ namespace X86II {
SSEDomainShift = 22,
OpcodeShift = 24,
- OpcodeMask = 0xFF << OpcodeShift
+ OpcodeMask = 0xFF << OpcodeShift,
+
+ //===------------------------------------------------------------------===//
+ // VEX - The opcode prefix used by AVX instructions
+ VEX = 1ULL << 32,
+
+ // VEX_W - Has a opcode specific functionality, but is used in the same
+ // way as REX_W is for regular SSE instructions.
+ VEX_W = 1ULL << 33,
+
+ // VEX_4V - Used to specify an additional AVX/SSE register. Several 2
+ // address instructions in SSE are represented as 3 address ones in AVX
+ // and the additional register is encoded in VEX_VVVV prefix.
+ VEX_4V = 1ULL << 34,
+
+ // VEX_I8IMM - Specifies that the last register used in a AVX instruction,
+ // must be encoded in the i8 immediate field. This usually happens in
+ // instructions with 4 operands.
+ VEX_I8IMM = 1ULL << 35
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
- static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) {
+ static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
- static inline bool hasImm(unsigned TSFlags) {
+ static inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
- static inline unsigned getSizeOfImm(unsigned TSFlags) {
+ static inline unsigned getSizeOfImm(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
default: assert(0 && "Unknown immediate size");
case X86II::Imm8:
case X86II::Imm8PCRel: return 1;
- case X86II::Imm16: return 2;
+ case X86II::Imm16:
+ case X86II::Imm16PCRel: return 2;
case X86II::Imm32:
case X86II::Imm32PCRel: return 4;
case X86II::Imm64: return 8;
@@ -433,23 +484,77 @@ namespace X86II {
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
- static inline unsigned isImmPCRel(unsigned TSFlags) {
+ static inline unsigned isImmPCRel(uint64_t TSFlags) {
switch (TSFlags & X86II::ImmMask) {
- default: assert(0 && "Unknown immediate size");
- case X86II::Imm8PCRel:
- case X86II::Imm32PCRel:
- return true;
- case X86II::Imm8:
- case X86II::Imm16:
- case X86II::Imm32:
- case X86II::Imm64:
- return false;
+ default: assert(0 && "Unknown immediate size");
+ case X86II::Imm8PCRel:
+ case X86II::Imm16PCRel:
+ case X86II::Imm32PCRel:
+ return true;
+ case X86II::Imm8:
+ case X86II::Imm16:
+ case X86II::Imm32:
+ case X86II::Imm64:
+ return false;
+ }
+ }
+
+ /// getMemoryOperandNo - The function returns the MCInst operand # for the
+ /// first field of the memory operand. If the instruction doesn't have a
+ /// memory operand, this returns -1.
+ ///
+ /// Note that this ignores tied operands. If there is a tied register which
+ /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only
+ /// counted as one operand.
+ ///
+ static inline int getMemoryOperandNo(uint64_t TSFlags) {
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form");
+ default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!");
+ case X86II::Pseudo:
+ case X86II::RawFrm:
+ case X86II::AddRegFrm:
+ case X86II::MRMDestReg:
+ case X86II::MRMSrcReg:
+ return -1;
+ case X86II::MRMDestMem:
+ return 0;
+ case X86II::MRMSrcMem: {
+ bool HasVEX_4V = TSFlags & X86II::VEX_4V;
+ unsigned FirstMemOp = 1;
+ if (HasVEX_4V)
+ ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
+
+ // FIXME: Maybe lea should have its own form? This is a horrible hack.
+ //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
+ // Opcode == X86::LEA16r || Opcode == X86::LEA32r)
+ return FirstMemOp;
}
- }
+ case X86II::MRM0r: case X86II::MRM1r:
+ case X86II::MRM2r: case X86II::MRM3r:
+ case X86II::MRM4r: case X86II::MRM5r:
+ case X86II::MRM6r: case X86II::MRM7r:
+ return -1;
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ return 0;
+ case X86II::MRM_C1:
+ case X86II::MRM_C2:
+ case X86II::MRM_C3:
+ case X86II::MRM_C4:
+ case X86II::MRM_C8:
+ case X86II::MRM_C9:
+ case X86II::MRM_E8:
+ case X86II::MRM_F0:
+ case X86II::MRM_F8:
+ case X86II::MRM_F9:
+ return -1;
+ }
+ }
}
-const int X86AddrNumOperands = 5;
-
inline static bool isScale(const MachineOperand &MO) {
return MO.isImm() &&
(MO.getImm() == 1 || MO.getImm() == 2 ||
@@ -555,7 +660,7 @@ public:
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
- const TargetRegisterInfo *TRI) const;
+ const TargetRegisterInfo &TRI) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -585,13 +690,12 @@ public:
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 0d59c42..1efef5a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -72,6 +72,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+
def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>;
def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
@@ -182,6 +184,9 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag]>;
+
+def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL,
+ []>;
//===----------------------------------------------------------------------===//
// X86 Operand Definitions.
@@ -197,13 +202,9 @@ def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
let SuperClasses = [];
}
-def X86NoSegMemAsmOperand : AsmOperandClass {
- let Name = "NoSegMem";
- let SuperClasses = [X86MemAsmOperand];
-}
def X86AbsMemAsmOperand : AsmOperandClass {
let Name = "AbsMem";
- let SuperClasses = [X86NoSegMemAsmOperand];
+ let SuperClasses = [X86MemAsmOperand];
}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
@@ -226,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
-//def f256mem : X86MemOperand<"printf256mem">;
+def f256mem : X86MemOperand<"printf256mem">;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
@@ -245,15 +246,11 @@ def i32mem_TC : Operand<i32> {
let ParserMatchClass = X86MemAsmOperand;
}
-def lea32mem : Operand<i32> {
- let PrintMethod = "printlea32mem";
- let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
- let ParserMatchClass = X86NoSegMemAsmOperand;
-}
let ParserMatchClass = X86AbsMemAsmOperand,
PrintMethod = "print_pcrel_imm" in {
def i32imm_pcrel : Operand<i32>;
+def i16imm_pcrel : Operand<i16>;
def offset8 : Operand<i64>;
def offset16 : Operand<i64>;
@@ -283,26 +280,31 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
-// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x7FFFFFFF] |
+// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti64i32";
}
-// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti16i8";
let SuperClasses = [ImmSExti64i32AsmOperand];
}
-// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti32i8";
}
-// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+// [0, 0x0000007F] |
+// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
let Name = "ImmSExti64i8";
- let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand,
+ ImmSExti64i32AsmOperand];
}
// A couple of more descriptive operand definitions.
@@ -321,10 +323,10 @@ def i32i8imm : Operand<i32> {
// Define X86 specific addressing mode.
def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
-def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
+def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex],
[]>;
-def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
//===----------------------------------------------------------------------===//
@@ -704,6 +706,12 @@ let isCall = 1 in
"lcall{w}\t{*}$dst", []>, OpSize;
def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst),
"lcall{l}\t{*}$dst", []>;
+
+ // callw for 16 bit code for the assembler.
+ let isAsmParserOnly = 1 in
+ def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm,
+ (outs), (ins i16imm_pcrel:$dst, variable_ops),
+ "callw\t$dst", []>, OpSize;
}
// Constructing a stack frame.
@@ -737,18 +745,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
"jmp\t$dst # TAILCALL",
[]>;
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
- "jmp{l}\t{*}$dst # TAILCALL",
- []>;
+ "", []>; // FIXME: Remove encoding when JIT is dead.
let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
-
- // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
- // marker on instructions, while still being able to relax.
- let isCodeGenOnly = 1 in {
- def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
- "jmp\t$dst # TAILCALL", []>;
- }
}
//===----------------------------------------------------------------------===//
@@ -815,7 +815,18 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
Requires<[In32BitMode]>;
}
-let isTwoAddress = 1 in // GR32 = bswap GR32
+let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP],
+ mayLoad=1, neverHasSideEffects=1 in {
+def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>,
+ Requires<[In32BitMode]>;
+}
+let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP],
+ mayStore=1, neverHasSideEffects=1 in {
+def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>,
+ Requires<[In32BitMode]>;
+}
+
+let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32
def BSWAP32r : I<0xC8, AddRegFrm,
(outs GR32:$dst), (ins GR32:$src),
"bswap{l}\t$dst",
@@ -855,11 +866,11 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins lea32mem:$src),
+ (outs GR16:$dst), (ins i32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
- (outs GR32:$dst), (ins lea32mem:$src),
+ (outs GR32:$dst), (ins i32mem:$src),
"lea{l}\t{$src|$dst}, {$dst|$src}",
[(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>;
@@ -1239,7 +1250,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src),
//===----------------------------------------------------------------------===//
// Two address Instructions.
//
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
// Conditional moves
let Uses = [EFLAGS] in {
@@ -1640,7 +1651,7 @@ def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32]
// i8 register pressure. Note that CMOV_GR8 is conservatively considered to
// clobber EFLAGS, because if one of the operands is zero, the expansion
// could involve an xor.
-let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in {
+let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in {
def CMOV_GR8 : I<0, Pseudo,
(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond),
"#CMOV_GR8 PSEUDO!",
@@ -1659,86 +1670,106 @@ def CMOV_GR16 : I<0, Pseudo,
[(set GR16:$dst,
(X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>;
def CMOV_RFP32 : I<0, Pseudo,
- (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
+ (outs RFP32:$dst),
+ (ins RFP32:$src1, RFP32:$src2, i8imm:$cond),
"#CMOV_RFP32 PSEUDO!",
- [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
+ [(set RFP32:$dst,
+ (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP64 : I<0, Pseudo,
- (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
+ (outs RFP64:$dst),
+ (ins RFP64:$src1, RFP64:$src2, i8imm:$cond),
"#CMOV_RFP64 PSEUDO!",
- [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
+ [(set RFP64:$dst,
+ (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond,
EFLAGS))]>;
def CMOV_RFP80 : I<0, Pseudo,
- (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
+ (outs RFP80:$dst),
+ (ins RFP80:$src1, RFP80:$src2, i8imm:$cond),
"#CMOV_RFP80 PSEUDO!",
- [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
+ [(set RFP80:$dst,
+ (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond,
EFLAGS))]>;
} // Predicates = [NoCMov]
-} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS]
+} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS]
} // Uses = [EFLAGS]
// unary instructions
let CodeSize = 2 in {
let Defs = [EFLAGS] in {
-def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst",
- [(set GR8:$dst, (ineg GR8:$src)),
+def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "neg{b}\t$dst",
+ [(set GR8:$dst, (ineg GR8:$src1)),
(implicit EFLAGS)]>;
-def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst",
- [(set GR16:$dst, (ineg GR16:$src)),
+def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
+ "neg{w}\t$dst",
+ [(set GR16:$dst, (ineg GR16:$src1)),
(implicit EFLAGS)]>, OpSize;
-def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst",
- [(set GR32:$dst, (ineg GR32:$src)),
+def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
+ "neg{l}\t$dst",
+ [(set GR32:$dst, (ineg GR32:$src1)),
(implicit EFLAGS)]>;
-let isTwoAddress = 0 in {
- def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst",
+
+let Constraints = "" in {
+ def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst),
+ "neg{b}\t$dst",
[(store (ineg (loadi8 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
- def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst",
+ def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst),
+ "neg{w}\t$dst",
[(store (ineg (loadi16 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>, OpSize;
- def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst",
+ def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst),
+ "neg{l}\t$dst",
[(store (ineg (loadi32 addr:$dst)), addr:$dst),
(implicit EFLAGS)]>;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
// Match xor -1 to not. Favors these over a move imm + xor to save code size.
let AddedComplexity = 15 in {
-def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst",
- [(set GR8:$dst, (not GR8:$src))]>;
-def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst",
- [(set GR16:$dst, (not GR16:$src))]>, OpSize;
-def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst",
- [(set GR32:$dst, (not GR32:$src))]>;
+def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "not{b}\t$dst",
+ [(set GR8:$dst, (not GR8:$src1))]>;
+def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
+ "not{w}\t$dst",
+ [(set GR16:$dst, (not GR16:$src1))]>, OpSize;
+def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
+ "not{l}\t$dst",
+ [(set GR32:$dst, (not GR32:$src1))]>;
}
-let isTwoAddress = 0 in {
- def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst",
+let Constraints = "" in {
+ def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst),
+ "not{b}\t$dst",
[(store (not (loadi8 addr:$dst)), addr:$dst)]>;
- def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst",
+ def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst),
+ "not{w}\t$dst",
[(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize;
- def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst",
+ def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst),
+ "not{l}\t$dst",
[(store (not (loadi32 addr:$dst)), addr:$dst)]>;
-}
+} // Constraints = ""
} // CodeSize
// TODO: inc/dec is slow for P4, but fast for Pentium-M.
let Defs = [EFLAGS] in {
let CodeSize = 2 in
-def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "inc{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"inc{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"inc{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
}
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1750,23 +1781,24 @@ let isTwoAddress = 0, CodeSize = 2 in {
[(store (add (loadi32 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
let CodeSize = 2 in
-def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
- [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
+def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
+ "dec{b}\t$dst",
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
-def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
+def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1),
"dec{w}\t$dst",
- [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>,
OpSize, Requires<[In32BitMode]>;
-def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
+def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
"dec{l}\t$dst",
- [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>,
Requires<[In32BitMode]>;
-}
+} // CodeSize = 2
-let isTwoAddress = 0, CodeSize = 2 in {
+let Constraints = "", CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>;
@@ -1778,7 +1810,7 @@ let isTwoAddress = 0, CodeSize = 2 in {
[(store (add (loadi32 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>,
Requires<[In32BitMode]>;
-}
+} // Constraints = "", CodeSize = 2
} // Defs = [EFLAGS]
// Logical operators...
@@ -1857,7 +1889,7 @@ def AND32ri8 : Ii8<0x83, MRM4r,
[(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def AND8mr : I<0x20, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"and{b}\t{$src, $dst|$dst, $src}",
@@ -1909,7 +1941,7 @@ let isTwoAddress = 0 in {
def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src),
"and{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
@@ -1983,7 +2015,7 @@ def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
"or{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"or{b}\t{$src, $dst|$dst, $src}",
[(store (or (load addr:$dst), GR8:$src), addr:$dst),
@@ -2025,7 +2057,7 @@ let isTwoAddress = 0 in {
"or{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src),
"or{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
@@ -2102,7 +2134,7 @@ def XOR32ri8 : Ii8<0x83, MRM6r,
[(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def XOR8mr : I<0x30, MRMDestMem,
(outs), (ins i8mem :$dst, GR8 :$src),
"xor{b}\t{$src, $dst|$dst, $src}",
@@ -2153,26 +2185,27 @@ let isTwoAddress = 0 in {
"xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
"xor{l}\t{$src, %eax|%eax, $src}", []>;
-} // isTwoAddress = 0
+} // Constraints = ""
} // Defs = [EFLAGS]
// Shift instructions
let Defs = [EFLAGS] in {
let Uses = [CL] in {
-def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1),
"shl{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (shl GR8:$src, CL))]>;
-def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (shl GR8:$src1, CL))]>;
+def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1),
"shl{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
-def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize;
+def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
"shl{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (shl GR32:$src, CL))]>;
+ [(set GR32:$dst, (shl GR32:$src1, CL))]>;
} // Uses = [CL]
def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
"shl{b}\t{$src2, $dst|$dst, $src2}",
[(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>;
+
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2),
"shl{w}\t{$src2, $dst|$dst, $src2}",
@@ -2193,7 +2226,7 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1),
} // isConvertibleToThreeAddress = 1
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
"shl{b}\t{%cl, $dst|$dst, CL}",
@@ -2227,18 +2260,18 @@ let isTwoAddress = 0 in {
def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst),
"shl{l}\t$dst",
[(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
+def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1),
"shr{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (srl GR8:$src, CL))]>;
-def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (srl GR8:$src1, CL))]>;
+def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1),
"shr{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
-def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize;
+def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (srl GR32:$src, CL))]>;
+ [(set GR32:$dst, (srl GR32:$src1, CL))]>;
}
def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
@@ -2262,7 +2295,7 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
"shr{l}\t$dst",
[(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
"shr{b}\t{%cl, $dst|$dst, CL}",
@@ -2296,18 +2329,18 @@ let isTwoAddress = 0 in {
def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst),
"shr{l}\t$dst",
[(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
+def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1),
"sar{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (sra GR8:$src, CL))]>;
-def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (sra GR8:$src1, CL))]>;
+def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1),
"sar{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
-def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize;
+def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (sra GR32:$src, CL))]>;
+ [(set GR32:$dst, (sra GR32:$src1, CL))]>;
}
def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2332,7 +2365,7 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
"sar{l}\t$dst",
[(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
"sar{b}\t{%cl, $dst|$dst, CL}",
@@ -2366,65 +2399,65 @@ let isTwoAddress = 0 in {
def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst),
"sar{l}\t$dst",
[(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
// Rotate instructions
-def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src),
+def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1),
"rcl{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src),
+def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1),
"rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src),
+def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1),
"rcl{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src),
+def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1),
"rcr{b}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt),
+def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt),
"rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>;
-def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize;
let Uses = [CL] in {
-def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src),
+def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1),
"rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize;
}
-def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt),
+def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt),
"rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize;
-def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{1, $dst|$dst, 1}", []>;
let Uses = [CL] in {
-def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src),
+def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt),
+def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt),
"rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst),
"rcl{b}\t{1, $dst|$dst, 1}", []>;
def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt),
@@ -2464,19 +2497,19 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst),
def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst),
"rcr{l}\t{%cl, $dst|$dst, CL}", []>;
}
-}
+} // Constraints = ""
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
-def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1),
"rol{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotl GR8:$src, CL))]>;
-def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotl GR8:$src1, CL))]>;
+def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1),
"rol{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
-def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize;
+def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotl GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotl GR32:$src1, CL))]>;
}
def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2501,7 +2534,7 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
"rol{l}\t$dst",
[(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
"rol{b}\t{%cl, $dst|$dst, CL}",
@@ -2535,18 +2568,18 @@ let isTwoAddress = 0 in {
def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst),
"rol{l}\t$dst",
[(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
+} // Constraints = ""
let Uses = [CL] in {
-def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
+def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
"ror{b}\t{%cl, $dst|$dst, CL}",
- [(set GR8:$dst, (rotr GR8:$src, CL))]>;
-def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
+ [(set GR8:$dst, (rotr GR8:$src1, CL))]>;
+def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
"ror{w}\t{%cl, $dst|$dst, CL}",
- [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
-def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
+ [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize;
+def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t{%cl, $dst|$dst, CL}",
- [(set GR32:$dst, (rotr GR32:$src, CL))]>;
+ [(set GR32:$dst, (rotr GR32:$src1, CL))]>;
}
def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2),
@@ -2571,7 +2604,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
"ror{l}\t$dst",
[(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
"ror{b}\t{%cl, $dst|$dst, CL}",
@@ -2605,8 +2638,7 @@ let isTwoAddress = 0 in {
def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst),
"ror{l}\t$dst",
[(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>;
-}
-
+} // Constraints = ""
// Double shift instructions (generalizations of rotate)
@@ -2662,7 +2694,7 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
TB, OpSize;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
let Uses = [CL] in {
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
"shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
@@ -2708,7 +2740,7 @@ let isTwoAddress = 0 in {
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2,
(i8 imm:$src3)), addr:$dst)]>,
TB, OpSize;
-}
+} // Constraints = ""
} // Defs = [EFLAGS]
@@ -2794,7 +2826,7 @@ def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
(X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
}
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Addition
def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
@@ -2838,7 +2870,7 @@ let isTwoAddress = 0 in {
"add{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src),
"add{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y
@@ -2900,7 +2932,7 @@ def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst),
"adc{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}",
[(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -2935,7 +2967,7 @@ let isTwoAddress = 0 in {
"adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src),
"adc{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
} // Uses = [EFLAGS]
// Register-Register Subtraction
@@ -3007,7 +3039,7 @@ def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
// Memory-Register Subtraction
def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
@@ -3052,7 +3084,7 @@ let isTwoAddress = 0 in {
"sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src),
"sub{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let Uses = [EFLAGS] in {
def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst),
@@ -3068,7 +3100,7 @@ def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst),
"sbb{l}\t{$src2, $dst|$dst, $src2}",
[(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>;
-let isTwoAddress = 0 in {
+let Constraints = "" in {
def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
[(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>;
@@ -3103,7 +3135,7 @@ let isTwoAddress = 0 in {
"sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src),
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
-}
+} // Constraints = ""
let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -3811,6 +3843,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "",
// Thread Local Storage Instructions
//
+// ELF TLS Support
// All calls clobber the non-callee saved registers. ESP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead.
@@ -3819,12 +3852,24 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"leal\t$sym, %eax; "
"call\t___tls_get_addr@PLT",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
+// Darwin TLS Support
+// For i386, the address of the thunk is passed on the stack, on return the
+// address of the variable is in %eax. %ecx is trashed during the function
+// call. All other registers are preserved.
+let Defs = [EAX, ECX],
+ Uses = [ESP],
+ usesCustomInserter = 1 in
+def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
+ "# TLSCall_32",
+ [(X86TLSCall addr:$sym)]>,
+ Requires<[In32BitMode]>;
+
let AddedComplexity = 5, isCodeGenOnly = 1 in
def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"movl\t%gs:$src, $dst",
@@ -4783,14 +4828,14 @@ def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
// Patterns for nodes that do not produce flags, for instructions that do.
// Increment reg.
-def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>;
-def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>;
// Decrement reg.
-def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>;
-def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>;
+def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>;
// or reg/reg.
def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 0952fc8..6cf7ac8 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -513,30 +513,20 @@ def : Pat<(store (v4i16 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v2i32 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
-def : Pat<(store (v2f32 VR64:$src), addr:$dst),
- (MMX_MOVQ64mr addr:$dst, VR64:$src)>;
def : Pat<(store (v1i64 VR64:$src), addr:$dst),
(MMX_MOVQ64mr addr:$dst, VR64:$src)>;
// Bit convert.
def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>;
-def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>;
-def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>;
def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>;
-def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>;
-def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>;
def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>;
@@ -545,8 +535,6 @@ def : Pat<(v1i64 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v2i32 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
-def : Pat<(v2f32 (bitconvert (i64 GR64:$src))),
- (MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v4i16 (bitconvert (i64 GR64:$src))),
(MMX_MOVD64to64rr GR64:$src)>;
def : Pat<(v8i8 (bitconvert (i64 GR64:$src))),
@@ -555,8 +543,6 @@ def : Pat<(i64 (bitconvert (v1i64 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v2i32 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
-def : Pat<(i64 (bitconvert (v2f32 VR64:$src))),
- (MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v4i16 VR64:$src))),
(MMX_MOVD64from64rr VR64:$src)>;
def : Pat<(i64 (bitconvert (v8i8 VR64:$src))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 5580ba7..ab0005b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -15,322 +15,6 @@
//===----------------------------------------------------------------------===//
-// SSE specific DAG Nodes.
-//===----------------------------------------------------------------------===//
-
-def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
- SDTCisFP<0>, SDTCisInt<2> ]>;
-def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>,
- SDTCisFP<1>, SDTCisVT<3, i8>]>;
-
-def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
-def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
-def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
- [SDNPCommutative, SDNPAssociative]>;
-def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>;
-def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>;
-def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>;
-def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>;
-def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>;
-def X86pshufb : SDNode<"X86ISD::PSHUFB",
- SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisSameAs<0,2>]>>;
-def X86pextrb : SDNode<"X86ISD::PEXTRB",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pextrw : SDNode<"X86ISD::PEXTRW",
- SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>;
-def X86pinsrb : SDNode<"X86ISD::PINSRB",
- SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86pinsrw : SDNode<"X86ISD::PINSRW",
- SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>,
- SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
-def X86insrtps : SDNode<"X86ISD::INSERTPS",
- SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>,
- SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>;
-def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL",
- SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>;
-def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
- [SDNPHasChain, SDNPMayLoad]>;
-def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>;
-def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>;
-def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>;
-def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>;
-def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>;
-def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>;
-def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
-def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
-def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
-
-def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
- SDTCisVT<1, v4f32>,
- SDTCisVT<2, v4f32>]>;
-def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
-
-//===----------------------------------------------------------------------===//
-// SSE Complex Patterns
-//===----------------------------------------------------------------------===//
-
-// These are 'extloads' from a scalar to the low element of a vector, zeroing
-// the top elements. These are used for the SSE 'ss' and 'sd' instruction
-// forms.
-def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
-def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [],
- [SDNPHasChain, SDNPMayLoad]>;
-
-def ssmem : Operand<v4f32> {
- let PrintMethod = "printf32mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-def sdmem : Operand<v2f64> {
- let PrintMethod = "printf64mem";
- let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
- let ParserMatchClass = X86MemAsmOperand;
-}
-
-//===----------------------------------------------------------------------===//
-// SSE pattern fragments
-//===----------------------------------------------------------------------===//
-
-def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
-def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
-def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
-def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
-
-// Like 'store', but always requires vector alignment.
-def alignedstore : PatFrag<(ops node:$val, node:$ptr),
- (store node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-// Like 'load', but always requires vector alignment.
-def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
- (f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
- (f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr),
- (v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr),
- (v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
- (v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr),
- (v2i64 (alignedload node:$ptr))>;
-
-// Like 'load', but uses special alignment checks suitable for use in
-// memory operands in most SSE instructions, which are required to
-// be naturally aligned on some targets but not on others. If the subtarget
-// allows unaligned accesses, match any load, though this may require
-// setting a feature bit in the processor (on startup, for example).
-// Opteron 10h and later implement such a feature.
-def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return Subtarget->hasVectorUAMem()
- || cast<LoadSDNode>(N)->getAlignment() >= 16;
-}]>;
-
-def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
-def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
-def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
-def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
-def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
-def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>;
-def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>;
-
-// SSSE3 uses MMX registers for some instructions. They aren't aligned on a
-// 16-byte boundary.
-// FIXME: 8 byte alignment for mmx reads is not required
-def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- return cast<LoadSDNode>(N)->getAlignment() >= 8;
-}]>;
-
-def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>;
-def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
-def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
-def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
-
-// MOVNT Support
-// Like 'store', but requires the non-temporal bit to be set
-def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal();
- return false;
-}]>;
-
-def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() && !ST->isTruncatingStore() &&
- ST->getAddressingMode() == ISD::UNINDEXED &&
- ST->getAlignment() >= 16;
- return false;
-}]>;
-
-def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
- (st node:$val, node:$ptr), [{
- if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
- return ST->isNonTemporal() &&
- ST->getAlignment() < 16;
- return false;
-}]>;
-
-def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
-def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
-def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
-def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>;
-def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>;
-def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
-
-def vzmovl_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzmovl
- (v2i64 (scalar_to_vector (loadi64 node:$src))))))>;
-def vzmovl_v4i32 : PatFrag<(ops node:$src),
- (bitconvert (v4i32 (X86vzmovl
- (v4i32 (scalar_to_vector (loadi32 node:$src))))))>;
-
-def vzload_v2i64 : PatFrag<(ops node:$src),
- (bitconvert (v2i64 (X86vzload node:$src)))>;
-
-
-def fp32imm0 : PatLeaf<(f32 fpimm), [{
- return N->isExactlyValue(+0.0);
-}]>;
-
-// BYTE_imm - Transform bit immediates into byte immediates.
-def BYTE_imm : SDNodeXForm<imm, [{
- // Transformation function: imm >> 3
- return getI32Imm(N->getZExtValue() >> 3);
-}]>;
-
-// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*,
-// SHUFP* etc. imm.
-def SHUFFLE_get_shuf_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShuffleSHUFImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to
-// PSHUFHW imm.
-def SHUFFLE_get_pshufhw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFHWImmediate(N));
-}]>;
-
-// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to
-// PSHUFLW imm.
-def SHUFFLE_get_pshuflw_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePSHUFLWImmediate(N));
-}]>;
-
-// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to
-// a PALIGNR imm.
-def SHUFFLE_get_palign_imm : SDNodeXForm<vector_shuffle, [{
- return getI8Imm(X86::getShufflePALIGNRImmediate(N));
-}]>;
-
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
-def movddup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVDDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movlp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movshdup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSHDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def movsldup : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVSLDUPMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKLMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKHMask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKL_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isUNPCKH_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
-}]>;
-
-def pshufd : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFDMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def shufp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isSHUFPMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_shuf_imm>;
-
-def pshufhw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFHWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshufhw_imm>;
-
-def pshuflw : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPSHUFLWMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_pshuflw_imm>;
-
-def palign : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isPALIGNRMask(cast<ShuffleVectorSDNode>(N));
-}], SHUFFLE_get_palign_imm>;
-
-//===----------------------------------------------------------------------===//
// SSE scalar FP Instructions
//===----------------------------------------------------------------------===//
@@ -368,857 +52,642 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
}
//===----------------------------------------------------------------------===//
-// SSE1 Instructions
+// SSE 1 & 2 Instructions Classes
//===----------------------------------------------------------------------===//
-// Move Instructions. Register-to-register movss is not used for FR32
-// register copies because it's a partial register update; FsMOVAPSrr is
-// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movss for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}",
- [(set (v4f32 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class
+multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, X86MemOperand x86memop,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in {
+ def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, RC:$src2))]>;
+ }
+ def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))]>;
+}
+
+/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ Operand memopr, ComplexPattern mem_cpat,
+ bit Is2Addr = 1> {
+ def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))]>;
+ def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, mem_cpat:$src2))]>;
+}
+
+/// sse12_fp_packed - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ RegisterClass RC, ValueType vt,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ let mayLoad = 1 in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class
+multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d,
+ string OpcodeStr, X86MemOperand x86memop,
+ list<dag> pat_rr, list<dag> pat_rm,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rr, d>;
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ pat_rm, d>;
+}
+
+/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class
+multiclass sse12_fp_packed_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
+ string asm, string SSEVer, string FPSizeStr,
+ X86MemOperand x86memop, PatFrag mem_frag,
+ Domain d, bit Is2Addr = 1> {
+ def rr_Int : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, RC:$src2))], d>;
+ def rm_Int : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1,x86memop:$src2),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set RC:$dst, (!nameconcat<Intrinsic>("int_x86_sse",
+ !strconcat(SSEVer, !strconcat("_",
+ !strconcat(OpcodeStr, FPSizeStr))))
+ RC:$src1, (mem_frag addr:$src2)))], d>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Move Instructions
+//===----------------------------------------------------------------------===//
+
+class sse12_move_rr<RegisterClass RC, ValueType vt, string asm> :
+ SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm,
+ [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>;
+
+// Loading from memory automatically zeroing upper bits.
+class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
+ PatFrag mem_pat, string OpcodeStr> :
+ SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (mem_pat addr:$src))]>;
+
+// Move Instructions. Register-to-register movss/movsd is not used for FR32/64
+// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr
+// is used instead. Register-to-register movss/movsd is not modeled as an
+// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
+// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
+let isAsmParserOnly = 1 in {
+ def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+ def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+
+ let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ def MOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $dst|$dst, $src2}">, XS;
+ def MOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $dst|$dst, $src2}">, XD;
+}
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def MOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS;
+ let AddedComplexity = 20 in
+ def MOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD;
+}
+
+let AddedComplexity = 15 in {
// Extract the low 32-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
+// Extract the low 64-bit value from one vector and insert it into another.
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr (v2f64 VR128:$src1),
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
+}
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (loadf32 addr:$src))]>;
-
+let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
+def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>;
+
+let isAsmParserOnly = 1 in {
+def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
+ "movss\t{$src, $dst|$dst, $src}",
+ [(store FR32:$src, addr:$dst)]>, XS, VEX_4V;
+def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
+ "movsd\t{$src, $dst|$dst, $src}",
+ [(store FR64:$src, addr:$dst)]>, XD, VEX_4V;
+}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
(EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-// Conversion instructions
-def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR32:$src))]>;
-def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>;
-def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
- "cvtsi2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-
-// Match intrinsics which expect XMM operand(s).
-def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>;
-
-def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>;
-def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvtss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_cvtss2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
-def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvtps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtps2pi
- (load addr:$src)))]>;
-def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>;
-def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
- "cvttps2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttps2pi
- (load addr:$src)))]>;
-let Constraints = "$src1 = $dst" in {
- def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR64:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- VR64:$src2))]>;
- def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2),
- "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1,
- (load addr:$src2)))]>;
-}
-
-// Aliases for intrinsics
-def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si VR128:$src))]>;
-def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
- "cvttss2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse_cvttss2si(load addr:$src)))]>;
-
-let Constraints = "$src1 = $dst" in {
- def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- GR32:$src2))]>;
- def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1,
- (loadi32 addr:$src2)))]>;
-}
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
-
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
- "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
-}
-
-let Defs = [EFLAGS] in {
-def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>;
-def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
-def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}", []>;
-
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
- VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src, SSECC:$cc),
- "cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
- (load addr:$src), imm:$cc))]>;
-}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comiss\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v4f32 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE1 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
- // FIXME: Set encoding to pseudo!
-def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
- [(set FR32:$dst, fp32imm0)]>,
- Requires<[HasSSE1]>, TB, OpSize;
-
-// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are
-// disregarded.
+// Move Aligned/Unaligned floating point values
+multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
+ X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d,
+ bit IsReMaterializable = 1> {
let neverHasSideEffects = 1 in
-def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
- "movaps\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
- def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>;
- def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>;
- def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst),
- (ins FR32:$src1, FR32:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>;
-}
-
-def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fand FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86for FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f128mem:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set FR32:$dst, (X86fxor FR32:$src1,
- (memopfsf32 addr:$src2)))]>;
-
-let neverHasSideEffects = 1 in {
-def FsANDNPSrr : PSI<0x55, MRMSrcReg,
- (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}", []>;
-let mayLoad = 1 in
-def FsANDNPSrm : PSI<0x55, MRMSrcMem,
- (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}", []>;
-}
-}
-
-/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F32Int,
- bit Commutable = 0> {
- // Scalar operation, reg+reg.
- def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]>;
-
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1,
- sse_load_f32:$src2))]>;
+ def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], d>;
+let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+ def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set RC:$dst, (ld_frag addr:$src))], d>;
}
-}
-
-// Arithmetic instructions
-defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>;
-defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>;
-defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>;
-defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>;
-/// sse1_fp_binop_rm - Other SSE1 binops
-///
-/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form. Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse1_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F32Int,
- Intrinsic V4F32Int,
- bit Commutable = 0> {
-
- // Scalar operation, reg+reg.
- def SSrr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, FR32:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
- (ins FR32:$src1, f32mem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set FR32:$dst, (OpNode FR32:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PSrr : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PSrm : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv4f32 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SSrr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, reg+mem.
- def SSrm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2),
- !strconcat(OpcodeStr, "ss\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F32Int VR128:$src1,
- sse_load_f32:$src2))]>;
-
- // Vector intrinsic operation, reg+reg.
- def PSrr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, reg+mem.
- def PSrm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "ps\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, (memopv4f32 addr:$src2)))]>;
-}
+let isAsmParserOnly = 1 in {
+defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
+
+defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
+ "movaps", SSEPackedSingle>, VEX;
+defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64,
+ "movapd", SSEPackedDouble>, OpSize, VEX;
+defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
+ "movups", SSEPackedSingle>, VEX;
+defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
+ "movupd", SSEPackedDouble, 0>, OpSize, VEX;
}
+defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
+ "movaps", SSEPackedSingle>, TB;
+defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
+ "movapd", SSEPackedDouble>, TB, OpSize;
+defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
+ "movups", SSEPackedSingle>, TB;
+defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
+ "movupd", SSEPackedDouble, 0>, TB, OpSize;
-defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax,
- int_x86_sse_max_ss, int_x86_sse_max_ps>;
-defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
- int_x86_sse_min_ss, int_x86_sse_min_ps>;
-
-//===----------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+let isAsmParserOnly = 1 in {
+def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
-
+ [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v4f32 VR128:$src), addr:$dst)]>, VEX;
+def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>, VEX;
+def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(store (v8f32 VR256:$src), addr:$dst)]>, VEX;
+def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
+}
def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv4f32 addr:$src))]>;
+def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(store (v4f32 VR128:$src), addr:$dst)]>;
+def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(store (v2f64 VR128:$src), addr:$dst)]>;
-// Intrinsic forms of MOVUPS load and store
+// Intrinsic forms of MOVUPS/D load and store
+let isAsmParserOnly = 1 in {
+ let canFoldAsLoad = 1, isReMaterializable = 1 in
+ def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
+ def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
+ def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
+def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
[(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>;
+def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPSrm : PSI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlp VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- def MOVHPSrm : PSI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (movlhps VR128:$src1,
- (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
+// Move Low/High packed floating point values
+multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
+ PatFrag mov_frag, string base_opc,
+ string asm_opr> {
+ def PSrm : PI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"s"), asm_opr),
+ [(set RC:$dst,
+ (mov_frag RC:$src1,
+ (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))],
+ SSEPackedSingle>, TB;
+
+ def PDrm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f64mem:$src2),
+ !strconcat(!strconcat(base_opc,"d"), asm_opr),
+ [(set RC:$dst, (v2f64 (mov_frag RC:$src1,
+ (scalar_to_vector (loadf64 addr:$src2)))))],
+ SSEPackedDouble>, TB, OpSize;
+}
-def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
- (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+ defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
+ "\t{$src2, $dst|$dst, $src2}">;
+ defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
+ "\t{$src2, $dst|$dst, $src2}">;
+}
+let isAsmParserOnly = 1 in {
+def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
(iPTR 0))), addr:$dst)]>;
+def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movlpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract (v2f64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
+let isAsmParserOnly = 1 in {
+def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhps\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (unpckh (bc_v2f64 (v4f32 VR128:$src)),
+ (undef)), (iPTR 0))), addr:$dst)]>,
+ VEX;
+def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>,
+ VEX;
+}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
(unpckh (bc_v2f64 (v4f32 VR128:$src)),
(undef)), (iPTR 0))), addr:$dst)]>;
+def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
+ "movhpd\t{$src, $dst|$dst, $src}",
+ [(store (f64 (vector_extract
+ (v2f64 (unpckh VR128:$src, (undef))),
+ (iPTR 0))), addr:$dst)]>;
-let Constraints = "$src1 = $dst" in {
-let AddedComplexity = 20 in {
-def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movlhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
-
-def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- "movhlps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
-} // AddedComplexity
-} // Constraints = "$src1 = $dst"
+let isAsmParserOnly = 1, AddedComplexity = 20 in {
+ def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+ def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
+ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movlhps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
+ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ "movhlps\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>;
+}
+def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))),
+ (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>;
let AddedComplexity = 20 in {
-def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
-def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
+ def : Pat<(v4f32 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>;
+ def : Pat<(v2i64 (movddup VR128:$src, (undef))),
+ (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>;
}
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Conversion Instructions
+//===----------------------------------------------------------------------===//
+multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))]>;
+}
-// Arithmetic
-
-/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a
-/// scalar) and leaves the top elements undefined.
-///
-/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse1_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F32Int,
- Intrinsic V4F32Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
- def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode FR32:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
- def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
- Requires<[HasSSE1, OptForSize]>;
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (OpNode SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], d>;
+}
- // Vector operation, reg.
- def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
+multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src),
+ asm, []>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src), asm, []>;
+}
- // Vector operation, mem.
- def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+let isAsmParserOnly = 1 in {
+defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS,
+ VEX_4V;
+defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD,
+ VEX_4V;
+}
- // Intrinsic operation, reg.
- def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
+defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
+ "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
+defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64,
+ "cvttsd2si\t{$src, $dst|$dst, $src}">, XD;
+defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32,
+ "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS;
+defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32,
+ "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD;
+
+// Conversion Instructions Intrinsics - Match intrinsics which expect MM
+// and/or XMM operand(s).
+multiclass sse12_cvt_pint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))], d>;
+}
- // Intrinsic operation, mem.
- def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
- !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
+ Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag,
+ string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
+ [(set DstRC:$dst, (Int SrcRC:$src))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
+ [(set DstRC:$dst, (Int (ld_frag addr:$src)))]>;
+}
- // Vector intrinsic operation, reg
- def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
+multiclass sse12_cvt_pint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm, Domain d> {
+ def rr : PI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], d>;
+ def rm : PI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], d>;
+}
- // Vector intrinsic operation, mem
- def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
+ RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop,
+ PatFrag ld_frag, string asm> {
+ def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2),
+ asm, [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))]>;
+ def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst),
+ (ins DstRC:$src1, x86memop:$src2), asm,
+ [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-// Square root.
-defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>;
+let isAsmParserOnly = 1 in {
+ defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS,
+ VEX;
+ defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD,
+ VEX;
+}
+defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS;
+defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD;
-// Reciprocal approximations. Note that these typically require refinement
-// in order to obtain suitable precision.
-defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt,
- int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>;
-defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp,
- int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>;
-// Logical
let Constraints = "$src1 = $dst" in {
- let isCommutable = 1 in {
- def ANDPSrr : PSI<0x54, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (and VR128:$src1, VR128:$src2)))]>;
- def ORPSrr : PSI<0x56, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (or VR128:$src1, VR128:$src2)))]>;
- def XORPSrr : PSI<0x57, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64
- (xor VR128:$src1, VR128:$src2)))]>;
- }
-
- def ANDPSrm : PSI<0x54, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "andps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ORPSrm : PSI<0x56, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "orps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def XORPSrm : PSI<0x57, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "xorps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ANDNPSrr : PSI<0x55, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (and (xor VR128:$src1,
- (bc_v2i64 (v4i32 immAllOnesV))),
- VR128:$src2)))]>;
- def ANDNPSrm : PSI<0x55, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
- "andnps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
- (bc_v2i64 (v4i32 immAllOnesV))),
- (memopv2i64 addr:$src2))))]>;
+ defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS;
+ defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32,
+ "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD;
}
+// Instructions below don't have an AVX form.
+defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi,
+ f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi,
+ f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi,
+ f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB;
+defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi,
+ f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
+defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd,
+ i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}",
+ SSEPackedDouble>, TB, OpSize;
let Constraints = "$src1 = $dst" in {
- def CMPPSrri : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPSrmi : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}ps\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
-
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
- def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+ defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128,
+ int_x86_sse_cvtpi2ps,
+ i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
}
-}
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
-def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
-
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- let isConvertibleToThreeAddress = 1 in // Convert to pshufd
- def SHUFPSrri : PSIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- VR128:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v4f32 (shufp:$src3
- VR128:$src1, (memopv4f32 addr:$src2))))]>;
-
- let AddedComplexity = 10 in {
- def UNPCKHPSrr : PSI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPSrm : PSI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckh VR128:$src1,
- (memopv4f32 addr:$src2))))]>;
-
- def UNPCKLPSrr : PSI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPSrm : PSI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-// Mask creation
-def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>;
-def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>;
-
-// Prefetch intrinsic.
-def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
- "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
-def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
- "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
-def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
- "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
-def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
- "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
-// Non-temporal stores
-def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+/// SSE 1 Only
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-
-def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
-
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
-
-def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
- TB, Requires<[HasSSE2]>;
+// Aliases for intrinsics
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse_cvttss2si, f32mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS;
+defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32,
+ int_x86_sse2_cvttsd2si, f128mem, load,
+ "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD;
}
-
-// Load, store, and memory fence
-def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
- TB, Requires<[HasSSE1]>;
-
-// MXCSR register
-def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
-def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
-
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-zeros value if folding it would be beneficial.
-// FIXME: Change encoding to pseudo!
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in {
-def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4f32 immAllZerosV))]>;
-def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v2f64 immAllZerosV))]>;
-let ExeDomain = SSEPackedInt in
-def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
+ f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XS;
+defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
+ f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">,
+ XD;
+
+let isAsmParserOnly = 1, Pattern = []<dag> in {
+defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
+defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
-
-def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
-
-def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
-
-//===---------------------------------------------------------------------===//
-// SSE2 Instructions
-//===---------------------------------------------------------------------===//
-
-// Move Instructions. Register-to-register movsd is not used for FR64
-// register copies because it's a partial register update; FsMOVAPDrr is
-// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
-// because INSERT_SUBREG requires that the insert be implementable in terms of
-// a copy, and just mentioned, we don't use movsd for copies.
-let Constraints = "$src1 = $dst" in
-def MOVSDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}",
- [(set (v2f64 VR128:$dst),
- (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
-
-// Extract the low 64-bit value from one vector and insert it into another.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
- (MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
-
-// Implicitly promote a 64-bit scalar to a vector.
-def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
-
-// Loading from memory automatically zeroing upper bits.
-let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
-def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (loadf64 addr:$src))]>;
-
-// MOVSDrm zeros the high parts of the register; represent this
-// with SUBREG_TO_REG.
-let AddedComplexity = 20 in {
-def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
-def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
+let Pattern = []<dag> in {
+defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
+ "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS;
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */
}
-// Store scalar value to memory.
-def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store FR64:$src, addr:$dst)]>;
-
-// Extract and store.
-def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- addr:$dst),
- (MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+/// SSE 2 Only
-// Conversion instructions
-def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint FR64:$src))]>;
-def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>;
+// Convert scalar double to scalar single
+let isAsmParserOnly = 1 in {
+def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
+ (ins FR64:$src1, FR64:$src2),
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
+}
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
@@ -1226,35 +695,28 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
-def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src),
- "cvtsi2sd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>;
-def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtdq2ps\t{$src, $dst|$dst, $src}", []>;
-def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}", []>;
-
-// SSE2 instructions with XS prefix
+let isAsmParserOnly = 1 in
+defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
+ XS, VEX_4V;
+let Constraints = "$src1 = $dst" in
+defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
+ int_x86_sse2_cvtsd2ss, f64mem, load,
+ "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS;
+
+// Convert scalar single to scalar double
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
+ (ins FR32:$src1, FR32:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, Requires<[HasAVX]>, VEX_4V;
+def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
+ (ins FR32:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
+}
def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (fextend FR32:$src))]>, XS,
@@ -1264,394 +726,51 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>,
- Requires<[HasSSE2, OptForSpeed]>;
-
-// Match intrinsics which expect XMM operand(s).
-def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>;
-def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvtsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvtsd2si
- (load addr:$src)))]>;
-
-// Match intrinsics which expect MM and XMM operand(s).
-def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
-def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvtpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvtpd2pi
- (memop addr:$src)))]>;
-def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>;
-def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src),
- "cvttpd2pi\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst, (int_x86_sse_cvttpd2pi
- (memop addr:$src)))]>;
-def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>;
-def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "cvtpi2pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cvtpi2pd
- (load addr:$src)))]>;
-
-// Aliases for intrinsics
-def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst,
- (int_x86_sse2_cvttsd2si VR128:$src))]>;
-def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
- "cvttsd2si\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_cvttsd2si
- (load addr:$src)))]>;
-
-// Comparison instructions
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
- def CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-let mayLoad = 1 in
- def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
-
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-let mayLoad = 1 in
- def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
- "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
-}
-}
-
-let Defs = [EFLAGS] in {
-def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>;
-def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases to match intrinsics which expect XMM operand(s).
-let Constraints = "$src1 = $dst" in {
- def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, f64mem:$src, SSECC:$cc),
- "cmp${cc}sd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
- (load addr:$src), imm:$cc))]>;
-}
-
-let Defs = [EFLAGS] in {
-def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2),
- "ucomisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-
-def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- VR128:$src2))]>;
-def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
- "comisd\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86comi (v2f64 VR128:$src1),
- (load addr:$src2)))]>;
-} // Defs = [EFLAGS]
-
-// Aliases of packed SSE2 instructions for scalar use. These all have names
-// that start with 'Fs'.
-
-// Alias instructions that map fld0 to pxor for sse.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
- canFoldAsLoad = 1 in
-def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
- [(set FR64:$dst, fpimm0)]>,
- Requires<[HasSSE2]>, TB, OpSize;
-
-// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are
-// disregarded.
-let neverHasSideEffects = 1 in
-def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-
-// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
-// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
-
-// Alias bitwise logical operations using SSE logical ops on packed FP values.
-let Constraints = "$src1 = $dst" in {
-let isCommutable = 1 in {
- def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>;
- def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>;
- def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst),
- (ins FR64:$src1, FR64:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>;
-}
-
-def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fand FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86for FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f128mem:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set FR64:$dst, (X86fxor FR64:$src1,
- (memopfsf64 addr:$src2)))]>;
-
-let neverHasSideEffects = 1 in {
-def FsANDNPDrr : PDI<0x55, MRMSrcReg,
- (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}", []>;
-let mayLoad = 1 in
-def FsANDNPDrm : PDI<0x55, MRMSrcMem,
- (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}", []>;
-}
-}
-
-/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms.
-///
-/// In addition, we also have a special variant of the scalar form here to
-/// represent the associated intrinsic operation. This form is unlike the
-/// plain scalar form, in that it takes an entire vector (instead of a scalar)
-/// and leaves the top elements unmodified (therefore these cannot be commuted).
-///
-/// These three forms can each be reg+reg or reg+mem, so there are a total of
-/// six "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass basic_sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode, Intrinsic F64Int,
- bit Commutable = 0> {
- // Scalar operation, reg+reg.
- def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]>;
-
- // Intrinsic operation, reg+mem.
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1,
- sse_load_f64:$src2))]>;
+def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
+def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS, VEX_4V,
+ Requires<[HasAVX]>;
}
+let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
+def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ VR128:$src2))]>, XS,
+ Requires<[HasSSE2]>;
+def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
+ "cvtss2sd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
+ (load addr:$src2)))]>, XS,
+ Requires<[HasSSE2]>;
}
-// Arithmetic instructions
-defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>;
-defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>;
-defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>;
-defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>;
-
-/// sse2_fp_binop_rm - Other SSE2 binops
-///
-/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of
-/// instructions for a full-vector intrinsic form. Operations that map
-/// onto C operators don't use this form since they just use the plain
-/// vector form instead of having a separate vector intrinsic form.
-///
-/// This provides a total of eight "instructions".
-///
-let Constraints = "$src1 = $dst" in {
-multiclass sse2_fp_binop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F64Int,
- Intrinsic V2F64Int,
- bit Commutable = 0> {
-
- // Scalar operation, reg+reg.
- def SDrr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, FR64:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, reg+mem.
- def SDrm : SDI<opc, MRMSrcMem, (outs FR64:$dst),
- (ins FR64:$src1, f64mem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set FR64:$dst, (OpNode FR64:$src1, (load addr:$src2)))]>;
-
- // Vector operation, reg+reg.
- def PDrr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, reg+mem.
- def PDrm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1, (memopv2f64 addr:$src2)))]>;
-
- // Intrinsic operation, reg+reg.
- def SDrr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, reg+mem.
- def SDrm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2),
- !strconcat(OpcodeStr, "sd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (F64Int VR128:$src1,
- sse_load_f64:$src2))]>;
-
- // Vector intrinsic operation, reg+reg.
- def PDrr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
- // Vector intrinsic operation, reg+mem.
- def PDrm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "pd\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1,
- (memopv2f64 addr:$src2)))]>;
-}
+// Convert doubleword to packed single/double fp
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
+ TB, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vcvtdq2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2ps
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ TB, VEX, Requires<[HasAVX]>;
}
-
-defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax,
- int_x86_sse2_max_sd, int_x86_sse2_max_pd>;
-defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
- int_x86_sse2_min_sd, int_x86_sse2_min_pd>;
-
-//===---------------------------------------------------------------------===//
-// SSE packed FP Instructions
-
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
-
-def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movapd\t{$src, $dst|$dst, $src}",
- [(alignedstore (v2f64 VR128:$src), addr:$dst)]>;
-
-let neverHasSideEffects = 1 in
-def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
-def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (loadv2f64 addr:$src))]>;
-def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(store (v2f64 VR128:$src), addr:$dst)]>;
-
-// Intrinsic forms of MOVUPD load and store
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
-def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>;
-
-let Constraints = "$src1 = $dst" in {
- let AddedComplexity = 20 in {
- def MOVLPDrm : PDI<0x12, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movlpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlp VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- def MOVHPDrm : PDI<0x16, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "movhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movlhps VR128:$src1,
- (scalar_to_vector (loadf64 addr:$src2)))))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
-
-def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movlpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-// v2f64 extract element 1 is always custom lowered to unpack high to low
-// and extract element 0 so the non-store version isn't too horrible.
-def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movhpd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract
- (v2f64 (unpckh VR128:$src, (undef))),
- (iPTR 0))), addr:$dst)]>;
-
-// SSE2 instructions without OpSize prefix
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -1662,7 +781,18 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
TB, Requires<[HasSSE2]>;
-// SSE2 instructions with XS prefix
+// FIXME: why the non-intrinsic version is described as SSE3?
+let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtdq2pd
+ (bitconvert (memopv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -1673,6 +803,29 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
(bitconvert (memopv2i64 addr:$src))))]>,
XS, Requires<[HasSSE2]>;
+// Convert packed single/double fp to doubleword
+let isAsmParserOnly = 1 in {
+def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -1680,12 +833,54 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-// SSE2 packed instructions with XS prefix
+
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, VEX, Requires<[HasAVX]>;
+def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, VEX, Requires<[HasAVX]>;
+}
+def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+ XD, Requires<[HasSSE2]>;
+def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
+ (memop addr:$src)))]>,
+ XD, Requires<[HasSSE2]>;
+
+
+// Convert with truncation packed single/double fp to doubleword
+let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
+
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (int_x86_sse2_cvttps2dq VR128:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvttps2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttps2dq
+ (memop addr:$src)))]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -1697,17 +892,18 @@ def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(memop addr:$src)))]>,
XS, Requires<[HasSSE2]>;
-// SSE2 packed instructions with XD prefix
-def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
- XD, Requires<[HasSSE2]>;
-def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtpd2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cvtpd2dq
- (memop addr:$src)))]>,
- XD, Requires<[HasSSE2]>;
-
+let isAsmParserOnly = 1 in {
+def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>,
+ VEX;
+def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
+ (memop addr:$src)))]>, VEX;
+}
def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -1716,12 +912,31 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-// SSE2 instructions without OpSize prefix
+// Convert packed single to packed double
+let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
+ Requires<[HasAVX]>;
+}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
+ VEX, Requires<[HasAVX]>;
+def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ "cvtps2pd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtps2pd
+ (load addr:$src)))]>,
+ VEX, Requires<[HasAVX]>;
+}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1732,12 +947,29 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
(load addr:$src)))]>,
TB, Requires<[HasSSE2]>;
+// Convert packed double to packed single
+let isAsmParserOnly = 1 in {
+def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+// FIXME: the memory form of this instruction should described using
+// use extra asm syntax
+}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
+let isAsmParserOnly = 1 in {
+def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
+def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
+ (ins f128mem:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_cvtpd2ps
+ (memop addr:$src)))]>;
+}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1746,269 +978,1039 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-// Match intrinsics which expect XMM operand(s).
-// Aliases for intrinsics
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Compare Instructions
+//===----------------------------------------------------------------------===//
+
+// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
+multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
+ string asm, string asm_alt> {
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
+ let mayLoad = 1 in
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
+ }
+}
+
+let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+ defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XS, VEX_4V;
+ defm VCMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpsd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
+ XD, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+ defm CMPSS : sse12_cmp_scalar<FR32, f32mem,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}",
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}">, XS;
+ defm CMPSD : sse12_cmp_scalar<FR64, f64mem,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}",
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}">, XD;
+}
+
+multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm> {
+ def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src, imm:$cc))]>;
+ def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm,
+ [(set VR128:$dst, (Int VR128:$src1,
+ (load addr:$src), imm:$cc))]>;
+}
+
+// Aliases to match intrinsics which expect XMM operand(s).
+let isAsmParserOnly = 1 in {
+ defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+ defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
+}
let Constraints = "$src1 = $dst" in {
-def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, GR32:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- GR32:$src2))]>;
-def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2),
- "cvtsi2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1,
- (loadi32 addr:$src2)))]>;
-def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- VR128:$src2))]>;
-def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
- "cvtsd2ss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1,
- (load addr:$src2)))]>;
-def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- VR128:$src2))]>, XS,
- Requires<[HasSSE2]>;
-def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2),
- "cvtss2sd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
- (load addr:$src2)))]>, XS,
- Requires<[HasSSE2]>;
+ defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
+ defm Int_CMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $dst|$dst, $src}">, XD;
+}
+
+
+// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS
+multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
+ ValueType vt, X86MemOperand x86memop,
+ PatFrag ld_frag, string OpcodeStr, Domain d> {
+ def rr: PI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], d>;
+ def rm: PI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"),
+ [(set EFLAGS, (OpNode (vt RC:$src1),
+ (ld_frag addr:$src2)))], d>;
+}
+
+let Defs = [EFLAGS] in {
+ let isAsmParserOnly = 1 in {
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ }
+ defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, TB;
+ defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ let Pattern = []<dag> in {
+ defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+ }
+
+ defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, TB;
+ defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, TB, OpSize;
+
+ defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, TB;
+ defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, TB, OpSize;
+} // Defs = [EFLAGS]
+
+// sse12_cmp_packed - sse 1 & 2 compared packed instructions
+multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
+ Intrinsic Int, string asm, string asm_alt,
+ Domain d> {
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
+ // Accept explicit immediate argument form instead of comparison code.
+ let isAsmParserOnly = 1 in {
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ }
+}
+
+let isAsmParserOnly = 1 in {
+ defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+ defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $dst|$dst, $src}",
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedSingle>, TB;
+ defm CMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $dst|$dst, $src}",
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+}
+
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
+def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
+ (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
+def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
+ (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Shuffle Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_shuffle - sse 1 & 2 shuffle instructions
+multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
+ ValueType vt, string asm, PatFrag mem_frag,
+ Domain d, bit IsConvertibleToThreeAddress = 0> {
+ def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst, (vt (shufp:$src3
+ VR128:$src1, (mem_frag addr:$src2))))], d>;
+ let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in
+ def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm,
+ [(set VR128:$dst,
+ (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>;
+}
+
+let isAsmParserOnly = 1 in {
+ defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, VEX_4V;
+ defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv4f32, SSEPackedSingle, 1 /* cvt to pshufd */>,
+ TB;
+ defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Unpack Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave
+multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
+ PatFrag mem_frag, RegisterClass RC,
+ X86MemOperand x86memop, string asm,
+ Domain d> {
+ def rr : PI<opc, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1, RC:$src2)))], d>;
+ def rm : PI<opc, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+ asm, [(set RC:$dst,
+ (vt (OpNode RC:$src1,
+ (mem_frag addr:$src2))))], d>;
+}
+
+let AddedComplexity = 10 in {
+ let isAsmParserOnly = 1 in {
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
+ SSEPackedSingle>, TB;
+ defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}",
+ SSEPackedDouble>, TB, OpSize;
+ } // Constraints = "$src1 = $dst"
+} // AddedComplexity
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Extract Floating-Point Sign mask
+//===----------------------------------------------------------------------===//
+
+/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave
+multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm,
+ Domain d> {
+ def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src),
+ !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+ [(set GR32:$dst, (Int RC:$src))], d>;
+}
+
+// Mask creation
+defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
+ SSEPackedSingle>, TB;
+defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
+ SSEPackedDouble>, TB, OpSize;
+
+let isAsmParserOnly = 1 in {
+ defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+ defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+ // FIXME: merge with multiclass above when the intrinsics come.
+ def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+ def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
+//===----------------------------------------------------------------------===//
+
+// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have
+// names that start with 'Fs'.
+
+// Alias instructions that map fld0 to pxor for sse.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1,
+ canFoldAsLoad = 1 in {
+ // FIXME: Set encoding to pseudo!
+def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "",
+ [(set FR32:$dst, fp32imm0)]>,
+ Requires<[HasSSE1]>, TB, OpSize;
+def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "",
+ [(set FR64:$dst, fpimm0)]>,
+ Requires<[HasSSE2]>, TB, OpSize;
}
-// Arithmetic
+// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper
+// bits are disregarded.
+let neverHasSideEffects = 1 in {
+def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ "movaps\t{$src, $dst|$dst, $src}", []>;
+def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
+ "movapd\t{$src, $dst|$dst, $src}", []>;
+}
-/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms.
+// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
+// bits are disregarded.
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
+ "movaps\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
+def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
+ "movapd\t{$src, $dst|$dst, $src}",
+ [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
+}
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Logical Instructions
+//===----------------------------------------------------------------------===//
+
+/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops
+///
+multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let isAsmParserOnly = 1 in {
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
+ }
+
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
+ f32, f128mem, memopfsf32, SSEPackedSingle>, TB;
+
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
+ f64, f128mem, memopfsf64, SSEPackedDouble>, TB, OpSize;
+ }
+}
+
+// Alias bitwise logical operations using SSE logical ops on packed FP values.
+let mayLoad = 0 in {
+ defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>;
+ defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>;
+ defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>;
+}
+
+let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
+ defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>;
+
+/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
///
+multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, int HasPat = 0,
+ list<list<dag>> Pattern = []> {
+ let isAsmParserOnly = 1, Pattern = []<dag> in {
+ defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]), 0>,
+ VEX_4V;
+
+ defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))]), 0>,
+ OpSize, VEX_4V;
+ }
+ let Constraints = "$src1 = $dst" in {
+ defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f128mem,
+ !if(HasPat, Pattern[0], // rr
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
+ VR128:$src2)))]),
+ !if(HasPat, Pattern[2], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>, TB;
+
+ defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f128mem,
+ !if(HasPat, Pattern[1], // rr
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (bc_v2i64 (v2f64
+ VR128:$src2))))]),
+ !if(HasPat, Pattern[3], // rm
+ [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+ (memopv2i64 addr:$src2)))])>,
+ TB, OpSize;
+ }
+}
+
+/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
+///
+let isAsmParserOnly = 1 in {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+ defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
+ !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+
+ defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
+ !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+}
+}
+
+// AVX 256-bit packed logical ops forms
+defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
+defm VOR : sse12_fp_packed_logical_y<0x56, "or">;
+defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
+let isCommutable = 0 in
+ defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+
+defm AND : sse12_fp_packed_logical<0x54, "and", and>;
+defm OR : sse12_fp_packed_logical<0x56, "or", or>;
+defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>;
+let isCommutable = 0 in
+ defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
+ // single r+r
+ [(set VR128:$dst, (v2i64 (and (xor VR128:$src1,
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ VR128:$src2)))],
+ // double r+r
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (bc_v2i64 (v2f64 VR128:$src2))))],
+ // single r+m
+ [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)),
+ (bc_v2i64 (v4i32 immAllOnesV))),
+ (memopv2i64 addr:$src2))))],
+ // double r+m
+ [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
+ (memopv2i64 addr:$src2)))]]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and
+/// vector forms.
+///
+/// In addition, we also have a special variant of the scalar form here to
+/// represent the associated intrinsic operation. This form is unlike the
+/// plain scalar form, in that it takes an entire vector (instead of a scalar)
+/// and leaves the top elements unmodified (therefore these cannot be commuted).
+///
+/// These three forms can each be reg+reg or reg+mem.
+///
+multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"),
+ OpNode, FR32, f32mem, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"),
+ OpNode, FR64, f64mem, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ bit Is2Addr = 1> {
+ let mayLoad = 0 in {
+ defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
+ v4f32, f128mem, memopv4f32, SSEPackedSingle, Is2Addr>, TB;
+ defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
+ v2f64, f128mem, memopv2f64, SSEPackedDouble, Is2Addr>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_p_y<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
+ let mayLoad = 0 in {
+ defm PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR256,
+ v8f32, f256mem, memopv8f32, SSEPackedSingle, 0>, TB;
+ defm PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR256,
+ v4f64, f256mem, memopv4f64, SSEPackedDouble, 0>, TB, OpSize;
+ }
+}
+
+multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, Is2Addr>, XS;
+ defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, Is2Addr>, XD;
+}
+
+multiclass basic_sse12_fp_binop_p_int<bits<8> opc, string OpcodeStr,
+ bit Is2Addr = 1> {
+ defm PS : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "ps"), "", "_ps", f128mem, memopv4f32,
+ SSEPackedSingle, Is2Addr>, TB;
+
+ defm PD : sse12_fp_packed_int<opc, OpcodeStr, VR128,
+ !strconcat(OpcodeStr, "pd"), "2", "_pd", f128mem, memopv2f64,
+ SSEPackedDouble, Is2Addr>, TB, OpSize;
+}
+
+// Binary Arithmetic instructions
+let isAsmParserOnly = 1 in {
+ defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+ defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+
+ let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
+ }
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd>,
+ basic_sse12_fp_binop_s_int<0x58, "add">;
+ defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul>,
+ basic_sse12_fp_binop_s_int<0x59, "mul">;
+
+ let isCommutable = 0 in {
+ defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub">;
+ defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv>,
+ basic_sse12_fp_binop_s_int<0x5E, "div">;
+ defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_s_int<0x5F, "max">,
+ basic_sse12_fp_binop_p_int<0x5F, "max">;
+ defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin>,
+ basic_sse12_fp_binop_s_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_int<0x5D, "min">;
+ }
+}
+
+/// Unop Arithmetic
/// In addition, we also have a special variant of the scalar form here to
/// represent the associated intrinsic operation. This form is unlike the
/// plain scalar form, in that it takes an entire vector (instead of a
/// scalar) and leaves the top elements undefined.
///
/// And, we have a special variant form for a full-vector intrinsic form.
-///
-/// These four forms can each have a reg or a mem operand, so there are a
-/// total of eight "instructions".
-///
-multiclass sse2_fp_unop_rm<bits<8> opc, string OpcodeStr,
- SDNode OpNode,
- Intrinsic F64Int,
- Intrinsic V2F64Int,
- bit Commutable = 0> {
- // Scalar operation, reg.
+
+/// sse1_fp_unop_s - SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode FR32:$src))]>;
+ // For scalar unary operations, fold a load into the operation
+ // only in OptForSize mode. It eliminates an instruction, but it also
+ // eliminates a whole-register clobber (the load), so it introduces a
+ // partial register update condition.
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int VR128:$src))]>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst), (ins ssmem:$src),
+ !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F32Int sse_load_f32:$src))]>;
+}
+
+/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form.
+multiclass sse1_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F32Int> {
+ def SSr : SSI<opc, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, XS, Requires<[HasAVX, OptForSize]>;
+ def SSr_Int : SSI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SSm_Int : SSI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, ssmem:$src2),
+ !strconcat(!strconcat("v", OpcodeStr),
+ "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+}
+
+/// sse1_fp_unop_p - SSE1 unops in packed form.
+multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))]>;
+ def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form.
+multiclass sse1_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))]>;
+ def PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv8f32 addr:$src)))]>;
+}
+
+/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms.
+multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V4F32Int> {
+ def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src))]>;
+ def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))]>;
+}
+
+
+/// sse2_fp_unop_s - SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
def SDr : SDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode FR64:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Scalar operation, mem.
- def SDm : SDI<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
+ [(set FR64:$dst, (OpNode FR64:$src))]>;
+ // See the comments in sse1_fp_unop_s for why this is OptForSize.
+ def SDm : I<opc, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
!strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set FR64:$dst, (OpNode (load addr:$src)))]>;
+ [(set FR64:$dst, (OpNode (load addr:$src)))]>, XD,
+ Requires<[HasSSE2, OptForSize]>;
+ def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int VR128:$src))]>;
+ def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
+ !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+}
+
+/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form.
+multiclass sse2_fp_unop_s_avx<bits<8> opc, string OpcodeStr,
+ SDNode OpNode, Intrinsic F64Int> {
+ def SDr : VSDI<opc, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDm : VSDI<opc, MRMSrcMem, (outs FR64:$dst),
+ (ins FR64:$src1, f64mem:$src2),
+ !strconcat(OpcodeStr,
+ "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>;
+ def SDr_Int : VSDI<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+ def SDm_Int : VSDI<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, sdmem:$src2),
+ !strconcat(OpcodeStr, "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>;
+}
- // Vector operation, reg.
+/// sse2_fp_unop_p - SSE2 unops in vector forms.
+multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr,
+ SDNode OpNode> {
def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]> {
- let isCommutable = Commutable;
- }
-
- // Vector operation, mem.
+ [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))]>;
def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))]>;
+}
- // Intrinsic operation, reg.
- def SDr_Int : SDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Intrinsic operation, mem.
- def SDm_Int : SDI<opc, MRMSrcMem, (outs VR128:$dst), (ins sdmem:$src),
- !strconcat(OpcodeStr, "sd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (F64Int sse_load_f64:$src))]>;
+/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms.
+multiclass sse2_fp_unop_p_y<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+ def PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))]>;
+ def PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
+ [(set VR256:$dst, (OpNode (memopv4f64 addr:$src)))]>;
+}
- // Vector intrinsic operation, reg
+/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms.
+multiclass sse2_fp_unop_p_int<bits<8> opc, string OpcodeStr,
+ Intrinsic V2F64Int> {
def PDr_Int : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (V2F64Int VR128:$src))]> {
- let isCommutable = Commutable;
- }
-
- // Vector intrinsic operation, mem
+ [(set VR128:$dst, (V2F64Int VR128:$src))]>;
def PDm_Int : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
!strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst, (V2F64Int (memopv2f64 addr:$src)))]>;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // Square root.
+ defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ VEX_4V;
+
+ defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p<0x51, "vsqrt", fsqrt>,
+ sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>,
+ VEX;
+
+ // Reciprocal approximations. Note that these typically require refinement
+ // in order to obtain suitable precision.
+ defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt,
+ int_x86_sse_rsqrt_ss>, VEX_4V;
+ defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>,
+ sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX;
+
+ defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ VEX_4V;
+ defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>,
+ sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX;
+}
+
// Square root.
-defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt,
- int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>;
+defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>,
+ sse1_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>,
+ sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>,
+ sse2_fp_unop_p<0x51, "sqrt", fsqrt>,
+ sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>;
+
+// Reciprocal approximations. Note that these typically require refinement
+// in order to obtain suitable precision.
+defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>,
+ sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>,
+ sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>;
+defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
+ sse1_fp_unop_p<0x53, "rcp", X86frcp>,
+ sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>;
// There is no f64 version of the reciprocal approximation instructions.
-// Logical
-let Constraints = "$src1 = $dst" in {
- let isCommutable = 1 in {
- def ANDPDrr : PDI<0x54, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def ORPDrr : PDI<0x56, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (or (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def XORPDrr : PDI<0x57, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (xor (bc_v2i64 (v2f64 VR128:$src1)),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- }
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Non-temporal stores
+//===----------------------------------------------------------------------===//
- def ANDPDrm : PDI<0x54, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "andpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ORPDrm : PDI<0x56, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "orpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (or (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def XORPDrm : PDI<0x57, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "xorpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (xor (bc_v2i64 (v2f64 VR128:$src1)),
- (memopv2i64 addr:$src2)))]>;
- def ANDNPDrr : PDI<0x55, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (bc_v2i64 (v2f64 VR128:$src2))))]>;
- def ANDNPDrm : PDI<0x55, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2),
- "andnpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (and (vnot (bc_v2i64 (v2f64 VR128:$src1))),
- (memopv2i64 addr:$src2)))]>;
+let isAsmParserOnly = 1 in {
+ def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+ def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+ let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ }
}
-let Constraints = "$src1 = $dst" in {
- def CMPPDrri : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- VR128:$src, imm:$cc))]>;
- def CMPPDrmi : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc),
- "cmp${cc}pd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
- (memop addr:$src), imm:$cc))]>;
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+let ExeDomain = SSEPackedInt in
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+// There is no AVX form for instructions below this point
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
- // Accept explicit immediate argument form instead of comparison code.
-let isAsmParserOnly = 1 in {
- def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
- def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
- "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
}
+def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Misc Instructions (No AVX form)
+//===----------------------------------------------------------------------===//
+
+// Prefetch intrinsic.
+def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
+ "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
+ "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
+ "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
+ "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+
+// Load, store, and memory fence
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-zeros value if folding it would be beneficial.
+// FIXME: Change encoding to pseudo!
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllZerosV))]>;
}
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
- (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
-def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)),
- (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>;
-// Shuffle and unpack instructions
-let Constraints = "$src1 = $dst" in {
- def SHUFPDrri : PDIi8<0xC6, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>;
- def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- f128mem:$src2, i8imm:$src3),
- "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (v2f64 (shufp:$src3
- VR128:$src1, (memopv2f64 addr:$src2))))]>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
- let AddedComplexity = 10 in {
- def UNPCKHPDrr : PDI<0x15, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>;
- def UNPCKHPDrm : PDI<0x15, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpckhpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckh VR128:$src1,
- (memopv2f64 addr:$src2))))]>;
-
- def UNPCKLPDrr : PDI<0x14, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>;
- def UNPCKLPDrm : PDI<0x14, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "unpcklpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>;
- } // AddedComplexity
-} // Constraints = "$src1 = $dst"
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
+
+//===----------------------------------------------------------------------===//
+// SSE 1 & 2 - Load/Store XCSR register
+//===----------------------------------------------------------------------===//
+let isAsmParserOnly = 1 in {
+ def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+ def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
+}
+
+def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
+def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>;
//===---------------------------------------------------------------------===//
-// SSE integer instructions
-let ExeDomain = SSEPackedInt in {
+// SSE2 - Move Aligned/Unaligned Packed Integer Instructions
+//===---------------------------------------------------------------------===//
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
+
+let isAsmParserOnly = 1 in {
+ let neverHasSideEffects = 1 in
+ def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+ def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+
+ let canFoldAsLoad = 1, mayLoad = 1 in {
+ def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>,
+ VEX;
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
+ XS, VEX, Requires<[HasAVX]>;
+ }
+
+ let mayStore = 1 in {
+ def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX;
+ def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
+ XS, VEX, Requires<[HasAVX]>;
+ }
+}
-// Move Instructions
let neverHasSideEffects = 1 in
def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movdqa\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, mayLoad = 1 in
+
+let canFoldAsLoad = 1, mayLoad = 1 in {
def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqa\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>;
-let mayStore = 1 in
-def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}",
- [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
-let canFoldAsLoad = 1, mayLoad = 1 in
def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>,
XS, Requires<[HasSSE2]>;
-let mayStore = 1 in
+}
+
+let mayStore = 1 in {
+def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}",
+ [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>;
def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[/*(store (v2i64 VR128:$src), addr:$dst)*/]>,
XS, Requires<[HasSSE2]>;
+}
// Intrinsic forms of MOVDQU load and store
+let isAsmParserOnly = 1 in {
+let canFoldAsLoad = 1 in
+def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
+ XS, VEX, Requires<[HasAVX]>;
+def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
+ XS, VEX, Requires<[HasAVX]>;
+}
+
let canFoldAsLoad = 1 in
def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movdqu\t{$src, $dst|$dst, $src}",
@@ -2019,55 +2021,72 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, Requires<[HasSSE2]>;
-let Constraints = "$src1 = $dst" in {
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Arithmetic Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in { // SSE integer instructions
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
- addr:$src2))))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
+ (bitconvert (memopv2i64 addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
- string OpcodeStr,
- Intrinsic IntId, Intrinsic IntId2> {
+ string OpcodeStr, Intrinsic IntId,
+ Intrinsic IntId2, bit Is2Addr = 1> {
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
+ (ins VR128:$src1, i32i8imm:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
}
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
+ ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1,
(bitconvert (memopv2i64 addr:$src2)))))]>;
}
@@ -2077,64 +2096,177 @@ multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
/// to collapse (bitconvert VT to VT) into its operand.
///
multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
- bit Commutable = 0> {
+ bit IsCommutable = 0, bit Is2Addr = 1> {
+ let isCommutable = IsCommutable in
def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]> {
- let isCommutable = Commutable;
- }
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>;
def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (memopv2i64 addr:$src2)))]>;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>;
}
-} // Constraints = "$src1 = $dst"
} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
-defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
-defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
-defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
-defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
-
-defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
-defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
-defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
-defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
+defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
+defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
+defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V;
+defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V;
+defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V;
+defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V;
+defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V;
+defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V;
+
+// Intrinsic forms
+defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>,
+ VEX_4V;
+defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>,
+ VEX_4V;
+defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>,
+ VEX_4V;
+defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>,
+ VEX_4V;
+defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>,
+ VEX_4V;
+defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>,
+ VEX_4V;
+defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>,
+ VEX_4V;
+defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>,
+ VEX_4V;
+defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>,
+ VEX_4V;
+defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>,
+ VEX_4V;
+defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>,
+ VEX_4V;
+defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>,
+ VEX_4V;
+defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>,
+ VEX_4V;
+defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>,
+ VEX_4V;
+defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>,
+ VEX_4V;
+defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>,
+ VEX_4V;
+defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>,
+ VEX_4V;
+defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>,
+ VEX_4V;
+defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst" in {
+defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>;
+defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>;
+defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>;
+defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>;
+defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>;
defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>;
defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>;
defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>;
+// Intrinsic forms
defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>;
defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>;
defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>;
defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>;
-
-defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>;
-
+defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>;
+defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>;
+defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>;
+defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>;
defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>;
-defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>;
+defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>;
defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>;
-
defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>;
+defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
+defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
+defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
+defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
+defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
+defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+
+} // Constraints = "$src1 = $dst"
-defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>;
-defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Logical Instructions
+//===---------------------------------------------------------------------===//
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+ int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+ VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+ int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+ VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+ int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+ VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+ int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+ VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+ int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+ VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+ int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+ VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+ int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+ VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+ int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+ VEX_4V;
+
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V;
+defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V;
-defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>;
-defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>;
-defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>;
-defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>;
-defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def VPSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ def VPSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+ VEX_4V;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def VPANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>, VEX_4V;
+ def VPANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>,
+ VEX_4V;
+}
+}
+let Constraints = "$src1 = $dst" in {
defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld",
@@ -2154,17 +2286,34 @@ defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw",
defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
-// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
- ExeDomain = SSEPackedInt in {
- def PSLLDQri : PDIi8<0x73, MRM7r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "pslldq\t{$src2, $dst|$dst, $src2}", []>;
- def PSRLDQri : PDIi8<0x73, MRM3r,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- "psrldq\t{$src2, $dst|$dst, $src2}", []>;
- // PSRADQri doesn't exist in SSE[1-3].
+defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
+defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>;
+defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
+
+let ExeDomain = SSEPackedInt in {
+ let neverHasSideEffects = 1 in {
+ // 128-bit logical shifts.
+ def PSLLDQri : PDIi8<0x73, MRM7r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "pslldq\t{$src2, $dst|$dst, $src2}", []>;
+ def PSRLDQri : PDIi8<0x73, MRM3r,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "psrldq\t{$src2, $dst|$dst, $src2}", []>;
+ // PSRADQri doesn't exist in SSE[1-3].
+ }
+ def PANDNrr : PDI<0xDF, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ VR128:$src2)))]>;
+
+ def PANDNrm : PDI<0xDF, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "pandn\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+ (memopv2i64 addr:$src2))))]>;
}
+} // Constraints = "$src1 = $dst"
let Predicates = [HasSSE2] in {
def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2),
@@ -2185,32 +2334,33 @@ let Predicates = [HasSSE2] in {
(v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>;
}
-// Logical
-defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
-defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
-defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-
-let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
- def PANDNrr : PDI<0xDF, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- VR128:$src2)))]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Comparison Instructions
+//===---------------------------------------------------------------------===//
- def PANDNrm : PDI<0xDF, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "pandn\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
- (memopv2i64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
+ 0>, VEX_4V;
+ defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1,
+ 0>, VEX_4V;
+ defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0,
+ 0>, VEX_4V;
+ defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0,
+ 0>, VEX_4V;
}
-// SSE2 Integer comparison
-defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>;
-defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>;
-defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>;
-defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
-defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
-defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+let Constraints = "$src1 = $dst" in {
+ defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>;
+ defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>;
+ defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>;
+ defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>;
+ defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>;
+ defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>;
+} // Constraints = "$src1 = $dst"
def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)),
(PCMPEQBrr VR128:$src1, VR128:$src2)>;
@@ -2238,94 +2388,147 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)),
def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
(PCMPGTDrm VR128:$src1, addr:$src2)>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Pack Instructions
+//===---------------------------------------------------------------------===//
-// Pack instructions
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
+ 0, 0>, VEX_4V;
+defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
+ 0, 0>, VEX_4V;
+defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128,
+ 0, 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+} // Constraints = "$src1 = $dst"
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Shuffle Instructions
+//===---------------------------------------------------------------------===//
let ExeDomain = SSEPackedInt in {
+multiclass sse2_pshuffle<string OpcodeStr, ValueType vt, PatFrag pshuf_frag,
+ PatFrag bc_frag> {
+def ri : Ii8<0x70, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1,
+ (undef))))]>;
+def mi : Ii8<0x70, MRMSrcMem,
+ (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (vt (pshuf_frag:$src2
+ (bc_frag (memopv2i64 addr:$src1)),
+ (undef))))]>;
+}
+} // ExeDomain = SSEPackedInt
-// Shuffle and unpack instructions
-let AddedComplexity = 5 in {
-def PSHUFDri : PDIi8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- VR128:$src1, (undef))))]>;
-def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32 (memopv2i64 addr:$src1)),
- (undef))))]>;
-}
-
-// SSE2 with ImmT == Imm8 and XS prefix.
-def PSHUFHWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1,
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-def PSHUFHWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshufhw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XS, Requires<[HasSSE2]>;
-
-// SSE2 with ImmT == Imm8 and XD prefix.
-def PSHUFLWri : Ii8<0x70, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1,
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
-def PSHUFLWmi : Ii8<0x70, MRMSrcMem,
- (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
- "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set VR128:$dst, (v8i16 (pshuflw:$src2
- (bc_v8i16 (memopv2i64 addr:$src1)),
- (undef))))]>,
- XD, Requires<[HasSSE2]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let AddedComplexity = 5 in
+ defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
+ VEX;
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS,
+ VEX;
-let Constraints = "$src1 = $dst" in {
- def PUNPCKLBWrr : PDI<0x60, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLBWrm : PDI<0x60, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLWDrr : PDI<0x61, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD,
+ VEX;
+}
+
+let Predicates = [HasSSE2] in {
+ let AddedComplexity = 5 in
+ defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize;
+
+ // SSE2 with ImmT == Imm8 and XS prefix.
+ defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS;
+
+ // SSE2 with ImmT == Imm8 and XD prefix.
+ defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Unpack Instructions
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
+ PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> {
+ def rr : PDI<opc, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (vt (unp_frag VR128:$src1, VR128:$src2)))]>;
+ def rm : PDI<opc, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (unp_frag VR128:$src1,
+ (bc_frag (memopv2i64
+ addr:$src2))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLWDrm : PDI<0x61, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpcklwd\t{$src2, $dst|$dst, $src2}",
+ (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+ "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKLDQrr : PDI<0x62, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
+ (v2i64 (unpckl VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+
+ defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8,
+ 0>, VEX_4V;
+ defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16,
+ 0>, VEX_4V;
+ defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32,
+ 0>, VEX_4V;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
+ def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>;
- def PUNPCKLDQrm : PDI<0x62, MRMSrcMem,
+ (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V;
+ def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem,
(outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckldq\t{$src2, $dst|$dst, $src2}",
+ "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
- (unpckl VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ (v2i64 (unpckh VR128:$src1,
+ (memopv2i64 addr:$src2))))]>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>;
+ defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>;
+ defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpcklqdq\t{$src2, $dst|$dst, $src2}",
@@ -2338,39 +2541,12 @@ let Constraints = "$src1 = $dst" in {
(v2i64 (unpckl VR128:$src1,
(memopv2i64 addr:$src2))))]>;
- def PUNPCKHBWrr : PDI<0x68, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHBWrm : PDI<0x68, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhbw\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v16i8 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHWDrr : PDI<0x69, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHWDrm : PDI<0x69, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhwd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v8i16 (memopv2i64 addr:$src2))))]>;
- def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>;
- def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
- "punpckhdq\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (unpckh VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>;
+ defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>;
+ defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>;
+ defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>;
+
+ /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen
+ /// knew to collapse (bitconvert VT to VT) into its operand.
def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"punpckhqdq\t{$src2, $dst|$dst, $src2}",
@@ -2384,102 +2560,117 @@ let Constraints = "$src1 = $dst" in {
(memopv2i64 addr:$src2))))]>;
}
-// Extract / Insert
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Integer Extract and Insert
+//===---------------------------------------------------------------------===//
+
+let ExeDomain = SSEPackedInt in {
+multiclass sse2_pinsrw<bit Is2Addr = 1> {
+ def rri : Ii8<0xC4, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1,
+ GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
+ def rmi : Ii8<0xC4, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1,
+ i16mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [(set VR128:$dst,
+ (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
+ imm:$src3))]>;
+}
+
+// Extract
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
+ (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
+ imm:$src2))]>, OpSize, VEX;
def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1),
imm:$src2))]>;
-let Constraints = "$src1 = $dst" in {
- def PINSRWrri : PDIi8<0xC4, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1,
- GR32:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>;
- def PINSRWrmi : PDIi8<0xC4, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1,
- i16mem:$src2, i32i8imm:$src3),
- "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- [(set VR128:$dst,
- (X86pinsrw VR128:$src1, (extloadi16 addr:$src2),
- imm:$src3))]>;
-}
-// Mask creation
-def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
- "pmovmskb\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
+// Insert
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
-// Conditional store
-let Uses = [EDI] in
-def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
-
-let Uses = [RDI] in
-def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
- "maskmovdqu\t{$mask, $src|$src, $mask}",
- [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+let Constraints = "$src1 = $dst" in
+ defm VPINSRW : sse2_pinsrw, TB, OpSize;
} // ExeDomain = SSEPackedInt
-// Non-temporal stores
-def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
- "movnti\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
- TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Packed Mask Creation
+//===---------------------------------------------------------------------===//
-let AddedComplexity = 400 in { // Prefer non-temporal versions
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in {
-let ExeDomain = SSEPackedInt in
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
-}
+let isAsmParserOnly = 1 in
+def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
+def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
+ "pmovmskb\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
-// Flush cache
-def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
- "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
- TB, Requires<[HasSSE2]>;
+} // ExeDomain = SSEPackedInt
-// Load, store, and memory fence
-def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
- "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
-def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
- "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+//===---------------------------------------------------------------------===//
+// SSE2 - Conditional Store
+//===---------------------------------------------------------------------===//
-// Pause. This "instruction" is encoded as "rep; nop", so even though it
-// was introduced with SSE2, it's backward compatible.
-def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+let ExeDomain = SSEPackedInt in {
-//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 0)), (NOOP)>;
-def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
-def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
- (i8 1)), (MFENCE)>;
+let isAsmParserOnly = 1 in {
+let Uses = [EDI] in
+def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX;
+let Uses = [RDI] in
+def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
+ (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
+}
-// Alias instructions that map zero vector to pxor / xorp* for sse.
-// We set canFoldAsLoad because this can be converted to a constant-pool
-// load of an all-ones value if folding it would be beneficial.
-let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
- // FIXME: Change encoding to pseudo.
- def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
- [(set VR128:$dst, (v4i32 immAllOnesV))]>;
+let Uses = [EDI] in
+def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>;
+let Uses = [RDI] in
+def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
+ "maskmovdqu\t{$mask, $src|$src, $mask}",
+ [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+
+} // ExeDomain = SSEPackedInt
+
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Doubleword
+//===---------------------------------------------------------------------===//
+// Move Int Doubleword to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector GR32:$src)))]>, VEX;
+def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
+ VEX;
+}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2489,6 +2680,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>;
+
+// Move Int Doubleword to Single Scalar
+let isAsmParserOnly = 1 in {
+def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
+
+def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
+ VEX;
+}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2497,20 +2700,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
-// SSE2 instructions with XS prefix
-def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
- Requires<[HasSSE2]>;
-def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(store (i64 (vector_extract (v2i64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
-
+// Move Packed Doubleword Int to Packed Double Int
+let isAsmParserOnly = 1 in {
+def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
+ (iPTR 0)))]>, VEX;
+def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (vector_extract (v4i32 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2520,6 +2721,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
+// Move Scalar Single to Double Int
+let isAsmParserOnly = 1 in {
+def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
+def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
+}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -2527,25 +2737,38 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-// Store / copy lower 64-bits of a XMM register.
-def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
- "movq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-
// movd / movq to XMM register zero-extends
+let AddedComplexity = 15, isAsmParserOnly = 1 in {
+def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v4i32 (X86vzmovl
+ (v4i32 (scalar_to_vector GR32:$src)))))]>,
+ VEX;
+def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (v2i64 (scalar_to_vector GR64:$src)))))]>,
+ VEX, VEX_W;
+}
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
(v4i32 (scalar_to_vector GR32:$src)))))]>;
-// This is X86-64 only.
def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
- "mov{d|q}\t{$src, $dst|$dst, $src}",
+ "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only
[(set VR128:$dst, (v2i64 (X86vzmovl
(v2i64 (scalar_to_vector GR64:$src)))))]>;
}
let AddedComplexity = 20 in {
+let isAsmParserOnly = 1 in
+def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v4i32 (X86vzmovl (v4i32 (scalar_to_vector
+ (loadi32 addr:$src))))))]>,
+ VEX;
def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2558,13 +2781,63 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
(MOVZDI2PDIrm addr:$src)>;
+}
+//===---------------------------------------------------------------------===//
+// SSE2 - Move Quadword
+//===---------------------------------------------------------------------===//
+
+// Move Quadword Int to Packed Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ VEX, Requires<[HasAVX]>;
+def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS,
+ Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
+
+// Move Packed Quadword Int to Quadword Int
+let isAsmParserOnly = 1 in
+def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>, VEX;
+def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(store (i64 (vector_extract (v2i64 VR128:$src),
+ (iPTR 0))), addr:$dst)]>;
+
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
+
+// Store / copy lower 64-bits of a XMM register.
+let isAsmParserOnly = 1 in
+def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
+def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
+
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst,
+ (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
+ (loadi64 addr:$src))))))]>,
+ XS, VEX, Requires<[HasAVX]>;
+
+let AddedComplexity = 20 in {
def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2i64 (X86vzmovl (v2i64 (scalar_to_vector
- (loadi64 addr:$src))))))]>, XS,
- Requires<[HasSSE2]>;
+ (loadi64 addr:$src))))))]>,
+ XS, Requires<[HasSSE2]>;
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVZQI2PQIrm addr:$src)>;
@@ -2575,12 +2848,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
+let isAsmParserOnly = 1, AddedComplexity = 15 in
+def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
+let AddedComplexity = 20, isAsmParserOnly = 1 in
+def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovq\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (v2i64 (X86vzmovl
+ (loadv2i64 addr:$src))))]>,
+ XS, VEX, Requires<[HasAVX]>;
let AddedComplexity = 20 in {
def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movq\t{$src, $dst|$dst, $src}",
@@ -2592,49 +2876,136 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
(MOVZPQILo2PQIrm addr:$src)>;
}
+// Instructions to match in the assembler
+let isAsmParserOnly = 1 in {
+// This instructions is in fact an alias to movd with 64 bit dst
+def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
+ "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
+}
+
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
+let isAsmParserOnly = 1 in
+def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}", []>, XS;
//===---------------------------------------------------------------------===//
-// SSE3 Instructions
+// SSE2 - Misc Instructions
//===---------------------------------------------------------------------===//
-// Move Instructions
-def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movshdup
- VR128:$src, (undef))))]>;
-def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movshdup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movshdup
- (memopv4f32 addr:$src), (undef)))]>;
+// Flush cache
+def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
+ "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
+ TB, Requires<[HasSSE2]>;
+
+// Load, store, and memory fence
+def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
+ "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>;
+def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
+ "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
+//TODO: custom lower this so as to never even generate the noop
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 0)), (NOOP)>;
+def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
+def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
+ (i8 1)), (MFENCE)>;
+
+// Alias instructions that map zero vector to pxor / xorp* for sse.
+// We set canFoldAsLoad because this can be converted to a constant-pool
+// load of an all-ones value if folding it would be beneficial.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
+ // FIXME: Change encoding to pseudo.
+ def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4i32 immAllOnesV))]>;
-def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (movsldup
+//===---------------------------------------------------------------------===//
+// SSE3 - Conversion Instructions
+//===---------------------------------------------------------------------===//
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
+def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Move Instructions
+//===---------------------------------------------------------------------===//
+
+// Replicate Single FP
+multiclass sse3_replicate_sfp<bits<8> op, PatFrag rep_frag, string OpcodeStr> {
+def rr : S3SI<op, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (v4f32 (rep_frag
VR128:$src, (undef))))]>;
-def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movsldup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (movsldup
+def rm : S3SI<op, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (rep_frag
(memopv4f32 addr:$src), (undef)))]>;
+}
-def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movddup\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
-def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movddup\t{$src, $dst|$dst, $src}",
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
+defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
+}
+defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">;
+defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">;
+
+// Replicate Double FP
+multiclass sse3_replicate_dfp<string OpcodeStr> {
+def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>;
+def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)),
(undef))))]>;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
+defm MOVDDUP : sse3_replicate_dfp<"movddup">;
+
+// Move Unaligned Integer
+let isAsmParserOnly = 1 in
+ def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vlddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
+def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "lddqu\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))),
(undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
+// Several Move patterns
let AddedComplexity = 5 in {
def : Pat<(movddup (memopv2f64 addr:$src), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
@@ -2646,52 +3017,98 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)),
(MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>;
}
-// Arithmetic
-let Constraints = "$src1 = $dst" in {
- def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubps\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1,
- (memop addr:$src2)))]>;
- def ADDSUBPDrr : S3I<0xD0, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- VR128:$src2))]>;
- def ADDSUBPDrm : S3I<0xD0, MRMSrcMem,
- (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- "addsubpd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1,
- (memop addr:$src2)))]>;
+// vector_shuffle v1, <undef> <1, 1, 3, 3>
+let AddedComplexity = 15 in
+def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
+ (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+// vector_shuffle v1, <undef> <0, 0, 2, 2>
+let AddedComplexity = 15 in
+ def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
+ (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
+let AddedComplexity = 20 in
+ def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
+ (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSE3 - Arithmetic
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, bit Is2Addr = 1> {
+ def rr : I<0xD0, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ VR128:$src2))]>;
+ def rm : I<0xD0, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (Int VR128:$src1,
+ (memop addr:$src2)))]>;
+
}
-def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "lddqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+ ExeDomain = SSEPackedDouble in {
+ defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", 0>, XD,
+ VEX_4V;
+ defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", 0>, OpSize,
+ VEX_4V;
+}
+let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
+ ExeDomain = SSEPackedDouble in {
+ defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps">, XD;
+ defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd">, TB, OpSize;
+}
+
+//===---------------------------------------------------------------------===//
+// SSE3 Instructions
+//===---------------------------------------------------------------------===//
// Horizontal ops
-class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3D_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3DI<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
+ !if(Is2Addr,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, (memop addr:$src2))))]>;
-class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
-class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
+class S3_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1>
: S3I<o, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, (memopv2f64 addr:$src2))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V;
+ def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V;
+ def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V;
+ def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+ def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V;
+}
+
let Constraints = "$src1 = $dst" in {
def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>;
def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>;
@@ -2703,35 +3120,14 @@ let Constraints = "$src1 = $dst" in {
def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>;
}
-// Thread synchronization
-def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
- [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
-def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
- [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <1, 1, 3, 3>
-let AddedComplexity = 15 in
-def : Pat<(v4i32 (movshdup VR128:$src, (undef))),
- (MOVSHDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
-def : Pat<(v4i32 (movshdup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSHDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
-// vector_shuffle v1, <undef> <0, 0, 2, 2>
-let AddedComplexity = 15 in
- def : Pat<(v4i32 (movsldup VR128:$src, (undef))),
- (MOVSLDUPrr VR128:$src)>, Requires<[HasSSE3]>;
-let AddedComplexity = 20 in
- def : Pat<(v4i32 (movsldup (bc_v4i32 (memopv2i64 addr:$src)), (undef))),
- (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>;
-
//===---------------------------------------------------------------------===//
-// SSSE3 Instructions
+// SSSE3 - Packed Absolute Instructions
//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8.
-multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128> {
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst, (IntId64 VR64:$src))]>;
@@ -2739,7 +3135,7 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR64:$dst,
- (IntId64 (bitconvert (memopv8i8 addr:$src))))]>;
+ (IntId64 (bitconvert (mem_frag64 addr:$src))))]>;
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
@@ -2752,240 +3148,203 @@ multiclass SS3I_unop_rm_int_8<bits<8> opc, string OpcodeStr,
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(IntId128
- (bitconvert (memopv16i8 addr:$src))))]>, OpSize;
+ (bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16.
-multiclass SS3I_unop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv4i16 addr:$src))))]>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>, VEX;
+ defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>, VEX;
+ defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32,
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>, VEX;
+}
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
+defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pabs_b,
+ int_x86_ssse3_pabs_b_128>;
+defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pabs_w,
+ int_x86_ssse3_pabs_w_128>;
+defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32,
+ int_x86_ssse3_pabs_d,
+ int_x86_ssse3_pabs_d_128>;
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Binary Operator Instructions
+//===---------------------------------------------------------------------===//
-/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32.
-multiclass SS3I_unop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128> {
+/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}.
+multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ PatFrag mem_frag64, PatFrag mem_frag128,
+ Intrinsic IntId64, Intrinsic IntId128,
+ bit Is2Addr = 1> {
+ let isCommutable = 1 in
def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst, (IntId64 VR64:$src))]>;
-
+ (ins VR64:$src1, VR64:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]>;
def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins i64mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR64:$dst,
- (IntId64
- (bitconvert (memopv2i32 addr:$src))))]>;
+ (ins VR64:$src1, i64mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR64:$dst,
+ (IntId64 VR64:$src1,
+ (bitconvert (memopv8i8 addr:$src2))))]>;
+ let isCommutable = 1 in
def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>,
- OpSize;
-
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv4i32 addr:$src))))]>, OpSize;
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb",
- int_x86_ssse3_pabs_b,
- int_x86_ssse3_pabs_b_128>;
-defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw",
- int_x86_ssse3_pabs_w,
- int_x86_ssse3_pabs_w_128>;
-defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd",
- int_x86_ssse3_pabs_d,
- int_x86_ssse3_pabs_d_128>;
-
-/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_8<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv8i8 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let isCommutable = 0 in {
+ defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
+ defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128, 0>, VEX_4V;
+ defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128, 0>, VEX_4V;
+ defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128, 0>, VEX_4V;
+ defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128, 0>, VEX_4V;
+ defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128, 0>, VEX_4V;
+ defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V;
+ defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128, 0>, VEX_4V;
+ defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8,
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128, 0>, VEX_4V;
+ defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16,
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128, 0>, VEX_4V;
+ defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32,
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128, 0>, VEX_4V;
}
-
-/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv4i16 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv8i16 addr:$src2))))]>, OpSize;
- }
+defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V;
}
-/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32.
-let Constraints = "$src1 = $dst" in {
- multiclass SS3I_binop_rm_int_32<bits<8> opc, string OpcodeStr,
- Intrinsic IntId64, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr64 : SS38I<opc, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst, (IntId64 VR64:$src1, VR64:$src2))]> {
- let isCommutable = Commutable;
- }
- def rm64 : SS38I<opc, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR64:$dst,
- (IntId64 VR64:$src1,
- (bitconvert (memopv2i32 addr:$src2))))]>;
-
- def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv4i32 addr:$src2))))]>, OpSize;
- }
+// None of these have i8 immediate fields.
+let ImmT = NoImm, Constraints = "$src1 = $dst" in {
+let isCommutable = 0 in {
+ defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_w,
+ int_x86_ssse3_phadd_w_128>;
+ defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phadd_d,
+ int_x86_ssse3_phadd_d_128>;
+ defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phadd_sw,
+ int_x86_ssse3_phadd_sw_128>;
+ defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_w,
+ int_x86_ssse3_phsub_w_128>;
+ defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32,
+ int_x86_ssse3_phsub_d,
+ int_x86_ssse3_phsub_d_128>;
+ defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_phsub_sw,
+ int_x86_ssse3_phsub_sw_128>;
+ defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8,
+ int_x86_ssse3_pmadd_ub_sw,
+ int_x86_ssse3_pmadd_ub_sw_128>;
+ defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8,
+ int_x86_ssse3_pshuf_b,
+ int_x86_ssse3_pshuf_b_128>;
+ defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8,
+ int_x86_ssse3_psign_b,
+ int_x86_ssse3_psign_b_128>;
+ defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16,
+ int_x86_ssse3_psign_w,
+ int_x86_ssse3_psign_w_128>;
+ defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32,
+ int_x86_ssse3_psign_d,
+ int_x86_ssse3_psign_d_128>;
+}
+defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16,
+ int_x86_ssse3_pmul_hr_sw,
+ int_x86_ssse3_pmul_hr_sw_128>;
}
-let ImmT = NoImm in { // None of these have i8 immediate fields.
-defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw",
- int_x86_ssse3_phadd_w,
- int_x86_ssse3_phadd_w_128>;
-defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd",
- int_x86_ssse3_phadd_d,
- int_x86_ssse3_phadd_d_128>;
-defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw",
- int_x86_ssse3_phadd_sw,
- int_x86_ssse3_phadd_sw_128>;
-defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw",
- int_x86_ssse3_phsub_w,
- int_x86_ssse3_phsub_w_128>;
-defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd",
- int_x86_ssse3_phsub_d,
- int_x86_ssse3_phsub_d_128>;
-defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw",
- int_x86_ssse3_phsub_sw,
- int_x86_ssse3_phsub_sw_128>;
-defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
- int_x86_ssse3_pmadd_ub_sw,
- int_x86_ssse3_pmadd_ub_sw_128>;
-defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
- int_x86_ssse3_pmul_hr_sw,
- int_x86_ssse3_pmul_hr_sw_128, 1>;
-
-defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
- int_x86_ssse3_pshuf_b,
- int_x86_ssse3_pshuf_b_128>;
-defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb",
- int_x86_ssse3_psign_b,
- int_x86_ssse3_psign_b_128>;
-defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw",
- int_x86_ssse3_psign_w,
- int_x86_ssse3_psign_w_128>;
-defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd",
- int_x86_ssse3_psign_d,
- int_x86_ssse3_psign_d_128>;
-}
-
-// palignr patterns.
-let Constraints = "$src1 = $dst" in {
- def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
- (ins VR64:$src1, VR64:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
- def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
- (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>;
-
- def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, OpSize;
- def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- []>, OpSize;
+def : Pat<(X86pshufb VR128:$src, VR128:$mask),
+ (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
+def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
+ (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+
+//===---------------------------------------------------------------------===//
+// SSSE3 - Packed Align Instruction Patterns
+//===---------------------------------------------------------------------===//
+
+multiclass sse3_palign<string asm, bit Is2Addr = 1> {
+ def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst),
+ (ins VR64:$src1, VR64:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>;
+ def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst),
+ (ins VR64:$src1, i64mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>;
+
+ def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
+ def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ []>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PALIGN : sse3_palign<"palignr">;
+
let AddedComplexity = 5 in {
def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)),
@@ -2996,10 +3355,6 @@ def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)),
(PALIGNR64rr VR64:$src2, VR64:$src1,
(SHUFFLE_get_palign_imm VR64:$src3))>,
Requires<[HasSSSE3]>;
-def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)),
- (PALIGNR64rr VR64:$src2, VR64:$src1,
- (SHUFFLE_get_palign_imm VR64:$src3))>,
- Requires<[HasSSSE3]>;
def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)),
(PALIGNR64rr VR64:$src2, VR64:$src1,
(SHUFFLE_get_palign_imm VR64:$src3))>,
@@ -3027,10 +3382,15 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
Requires<[HasSSSE3]>;
}
-def : Pat<(X86pshufb VR128:$src, VR128:$mask),
- (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
-def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))),
- (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>;
+//===---------------------------------------------------------------------===//
+// SSSE3 Misc Instructions
+//===---------------------------------------------------------------------===//
+
+// Thread synchronization
+def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor",
+ [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>;
+def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait",
+ [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>;
//===---------------------------------------------------------------------===//
// Non-Instruction Patterns
@@ -3311,287 +3671,9 @@ def : Pat<(store (v16i8 VR128:$src), addr:$dst),
(MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
-// SSE4.1 Instructions
+// SSE4.1 - Packed Move with Sign/Zero Extend
//===----------------------------------------------------------------------===//
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
- string OpcodeStr,
- Intrinsic V4F32Int,
- Intrinsic V2F64Int> {
- // Intrinsic operation, reg.
- // Vector intrinsic operation, reg
- def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Vector intrinsic operation, mem
- def PSm_Int : Ii8<opcps, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
- TA, OpSize,
- Requires<[HasSSE41]>;
-
- // Vector intrinsic operation, reg
- def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
- OpSize;
-
- // Vector intrinsic operation, mem
- def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
- (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
- !strconcat(OpcodeStr,
- "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
- [(set VR128:$dst,
- (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
- OpSize;
-}
-
-let Constraints = "$src1 = $dst" in {
-multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
- string OpcodeStr,
- Intrinsic F32Int,
- Intrinsic F64Int> {
- // Intrinsic operation, reg.
- def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, reg.
- def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
- (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize;
-
- // Intrinsic operation, mem.
- def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
- (outs VR128:$dst),
- (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
- OpSize;
-}
-}
-
-// FP round - roundss, roundps, roundsd, roundpd
-defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
- int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
-defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
- int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
-
-// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
-multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128> {
- def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
- def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set VR128:$dst,
- (IntId128
- (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
-}
-
-defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
- int_x86_sse41_phminposuw>;
-
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
-}
-
-defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq",
- int_x86_sse41_pcmpeqq, 1>;
-defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw",
- int_x86_sse41_packusdw, 0>;
-defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb",
- int_x86_sse41_pminsb, 1>;
-defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd",
- int_x86_sse41_pminsd, 1>;
-defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud",
- int_x86_sse41_pminud, 1>;
-defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw",
- int_x86_sse41_pminuw, 1>;
-defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb",
- int_x86_sse41_pmaxsb, 1>;
-defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd",
- int_x86_sse41_pmaxsd, 1>;
-defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
- int_x86_sse41_pmaxud, 1>;
-defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
- int_x86_sse41_pmaxuw, 1>;
-
-defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
-
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
- (PCMPEQQrr VR128:$src1, VR128:$src2)>;
-def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
- (PCMPEQQrm VR128:$src1, addr:$src2)>;
-
-/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
- SDNode OpNode, Intrinsic IntId128,
- bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode (OpVT VR128:$src1),
- VR128:$src2))]>, OpSize {
- let isCommutable = Commutable;
- }
- def rr_int : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
- def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, (memop addr:$src2)))]>,
- OpSize;
- }
-}
-
-/// SS48I_binop_rm - Simple SSE41 binary operator.
-let Constraints = "$src1 = $dst" in {
-multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
- ValueType OpVT, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (OpNode VR128:$src1,
- (bc_v4i32 (memopv2i64 addr:$src2))))]>,
- OpSize;
-}
-}
-
-defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
-
-/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
- OpSize;
- }
-}
-
-defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps",
- int_x86_sse41_blendps, 0>;
-defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd",
- int_x86_sse41_blendpd, 0>;
-defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw",
- int_x86_sse41_pblendw, 0>;
-defm DPPS : SS41I_binop_rmi_int<0x40, "dpps",
- int_x86_sse41_dpps, 1>;
-defm DPPD : SS41I_binop_rmi_int<0x41, "dppd",
- int_x86_sse41_dppd, 1>;
-defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw",
- int_x86_sse41_mpsadbw, 0>;
-
-
-/// SS41I_ternary_int - SSE 4.1 ternary operator
-let Uses = [XMM0], Constraints = "$src1 = $dst" in {
- multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
- def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
- OpSize;
-
- def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr,
- "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
- [(set VR128:$dst,
- (IntId VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
- }
-}
-
-defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
-defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
-defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
-
-
multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -3604,6 +3686,21 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
+ VEX;
+defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
+ VEX;
+defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>,
+ VEX;
+defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>,
+ VEX;
+defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>,
+ VEX;
+defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>,
+ VEX;
+}
+
defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>;
defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>;
defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>;
@@ -3655,6 +3752,17 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
+ VEX;
+defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
+ VEX;
+defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>,
+ VEX;
+defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>,
+ VEX;
+}
+
defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>;
defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>;
defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>;
@@ -3685,6 +3793,12 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
+ VEX;
+defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
+ VEX;
+}
defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>;
defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>;
@@ -3699,6 +3813,9 @@ def : Pat<(int_x86_sse41_pmovzxbq
(v4i32 (scalar_to_vector (loadi32 addr:$src))))))),
(PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Extract Instructions
+//===----------------------------------------------------------------------===//
/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem
multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
@@ -3718,6 +3835,9 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
+
defm PEXTRB : SS41I_extract8<0x14, "pextrb">;
@@ -3733,6 +3853,9 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
+
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -3752,8 +3875,31 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
+
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
+/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination
+multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
+ def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set GR64:$dst,
+ (extractelt (v2i64 VR128:$src1), imm:$src2))]>, OpSize, REX_W;
+ def mr : SS4AIi8<opc, MRMDestMem, (outs),
+ (ins i64mem:$dst, VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(store (extractelt (v2i64 VR128:$src1), imm:$src2),
+ addr:$dst)]>, OpSize, REX_W;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
+
+defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory
/// destination
@@ -3773,6 +3919,8 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">;
// Also match an EXTRACTPS store when the store is done as f32 instead of i32.
@@ -3782,78 +3930,530 @@ def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)),
(EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>,
Requires<[HasSSE41]>;
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insert8<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
- imm:$src3))]>, OpSize;
- }
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Insert Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, GR32:$src2, imm:$src3))]>, OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i8mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86pinsrb VR128:$src1, (extloadi8 addr:$src2),
+ imm:$src3))]>, OpSize;
}
-defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
+
+multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
+ imm:$src3)))]>, OpSize;
+}
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insert32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, GR32:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>,
- OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i32mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2),
- imm:$src3)))]>, OpSize;
- }
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+
+multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>,
+ OpSize;
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i64mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2),
+ imm:$src3)))]>, OpSize;
}
-defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
+let Constraints = "$src1 = $dst" in
+ defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
// insertps has a few different modes, there's the first two here below which
// are optimized inserts that won't zero arbitrary elements in the destination
// vector. The next one matches the intrinsic and could zero arbitrary elements
// in the target vector.
-let Constraints = "$src1 = $dst" in {
- multiclass SS41I_insertf32<bits<8> opc, string OpcodeStr> {
- def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
+multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
+ def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1, VR128:$src2, imm:$src3))]>,
OpSize;
- def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
- !strconcat(OpcodeStr,
- "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
- [(set VR128:$dst,
- (X86insrtps VR128:$src1,
- (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
- imm:$src3))]>, OpSize;
- }
+ def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, f32mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(asm,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (X86insrtps VR128:$src1,
+ (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
+ imm:$src3))]>, OpSize;
}
-defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let Constraints = "$src1 = $dst" in
+ defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
(INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>;
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Round Instructions
+//===----------------------------------------------------------------------===//
+
+multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr,
+ Intrinsic V4F32Int,
+ Intrinsic V2F64Int> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr_Int : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V4F32Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm_Int : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
+ TA, OpSize,
+ Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst, (V2F64Int VR128:$src1, imm:$src2))]>,
+ OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm_Int : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ [(set VR128:$dst,
+ (V2F64Int (memopv2f64 addr:$src1),imm:$src2))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_unop_rm_avx<bits<8> opcps, bits<8> opcpd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ // Vector intrinsic operation, reg
+ def PSr : SS4AIi8<opcps, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PSm : Ii8<opcps, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, TA, OpSize, Requires<[HasSSE41]>;
+
+ // Vector intrinsic operation, reg
+ def PDr : SS4AIi8<opcpd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+
+ // Vector intrinsic operation, mem
+ def PDm : SS4AIi8<opcpd, MRMSrcMem,
+ (outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
+ !strconcat(OpcodeStr,
+ "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+ []>, OpSize;
+}
+
+multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr,
+ Intrinsic F32Int,
+ Intrinsic F64Int, bit Is2Addr = 1> {
+ // Intrinsic operation, reg.
+ def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
+ OpSize;
+}
+
+multiclass sse41_fp_binop_rm_avx<bits<8> opcss, bits<8> opcsd,
+ string OpcodeStr> {
+ // Intrinsic operation, reg.
+ def SSr : SS4AIi8<opcss, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SSm : SS4AIi8<opcss, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, reg.
+ def SDr : SS4AIi8<opcsd, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+
+ // Intrinsic operation, mem.
+ def SDm : SS4AIi8<opcsd, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32i8imm:$src3),
+ !strconcat(OpcodeStr,
+ "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ []>, OpSize;
+}
+
+// FP round - roundss, roundps, roundsd, roundpd
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // Intrinsic form
+ defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>,
+ VEX;
+ defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd,
+ 0>, VEX_4V;
+ // Instructions for the assembler
+ defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX;
+ defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V;
+}
+
+defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round",
+ int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+let Constraints = "$src1 = $dst" in
+defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+ int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.1 - Misc Instructions
+//===----------------------------------------------------------------------===//
+
+// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16.
+multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128> {
+ def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst, (IntId128 VR128:$src))]>, OpSize;
+ def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src),
+ !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+ [(set VR128:$dst,
+ (IntId128
+ (bitconvert (memopv8i16 addr:$src))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
+ int_x86_sse41_phminposuw>, VEX;
+defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
+ int_x86_sse41_phminposuw>;
+
+/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
+multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let isCommutable = 0 in
+ defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
+ 0>, VEX_4V;
+ defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq,
+ 0>, VEX_4V;
+ defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb,
+ 0>, VEX_4V;
+ defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd,
+ 0>, VEX_4V;
+ defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud,
+ 0>, VEX_4V;
+ defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw,
+ 0>, VEX_4V;
+ defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb,
+ 0>, VEX_4V;
+ defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd,
+ 0>, VEX_4V;
+ defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud,
+ 0>, VEX_4V;
+ defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw,
+ 0>, VEX_4V;
+ defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in
+ defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>;
+ defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>;
+ defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>;
+ defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>;
+ defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>;
+ defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>;
+ defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>;
+ defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>;
+ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>;
+ defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>;
+ defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>;
+}
+
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
+ (PCMPEQQrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
+ (PCMPEQQrm VR128:$src1, addr:$src2)>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize;
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
+
+/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
+multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ let isCommutable = 1 in
+ def rri : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1, VR128:$src2, imm:$src3))]>,
+ OpSize;
+ def rmi : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i32i8imm:$src3),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), imm:$src3))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ let isCommutable = 0 in {
+ defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
+ 0>, VEX_4V;
+ defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd,
+ 0>, VEX_4V;
+ defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw,
+ 0>, VEX_4V;
+ defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw,
+ 0>, VEX_4V;
+ }
+ defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps,
+ 0>, VEX_4V;
+ defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd,
+ 0>, VEX_4V;
+}
+
+let Constraints = "$src1 = $dst" in {
+ let isCommutable = 0 in {
+ defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>;
+ defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>;
+ defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>;
+ defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>;
+ }
+ defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>;
+ defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>;
+}
+
+/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr> {
+ def rr : I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+
+ def rm : I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, VR128:$src3),
+ !strconcat(OpcodeStr,
+ "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+ [], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM;
+ }
+}
+
+defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">;
+defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">;
+defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">;
+
+/// SS41I_ternary_int - SSE 4.1 ternary operator
+let Uses = [XMM0], Constraints = "$src1 = $dst" in {
+ multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
+ def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))]>,
+ OpSize;
+
+ def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr,
+ "\t{%xmm0, $src2, $dst|$dst, $src2, %xmm0}"),
+ [(set VR128:$dst,
+ (IntId VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2)), XMM0))]>, OpSize;
+ }
+}
+
+defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>;
+defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>;
+defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
+
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
+let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+ OpSize, VEX;
+def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
+ "vptest\t{$src2, $src1|$src1, $src2}",
+ [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+ OpSize, VEX;
+}
+
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
@@ -3865,43 +4465,207 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
OpSize;
}
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "vmovntdqa\t{$src, $dst|$dst, $src}",
+ [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
+ OpSize, VEX;
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"movntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
OpSize;
-
//===----------------------------------------------------------------------===//
-// SSE4.2 Instructions
+// SSE4.2 - Compare Instructions
//===----------------------------------------------------------------------===//
/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator
-let Constraints = "$src1 = $dst" in {
- multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : SS428I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : SS428I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
+let isAsmParserOnly = 1, Predicates = [HasAVX] in
+ defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
+ 0>, VEX_4V;
+let Constraints = "$src1 = $dst" in
+ defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
(PCMPGTQrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
+//===----------------------------------------------------------------------===//
+// SSE4.2 - String/text Processing Instructions
+//===----------------------------------------------------------------------===//
+
+// Packed Compare Implicit Length Strings, Return Mask
+let Defs = [EFLAGS], usesCustomInserter = 1 in {
+ def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "#PCMPISTRM128rr PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
+ imm:$src3))]>, OpSize;
+ def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "#PCMPISTRM128rm PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpistrm128
+ VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize;
+}
+
+let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
+ Predicates = [HasAVX] in {
+ def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+ def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS] in {
+ def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+ def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
+}
+
+// Packed Compare Explicit Length Strings, Return Mask
+let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
+ def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "#PCMPESTRM128rr PSEUDO!",
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
+
+ def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "#PCMPESTRM128rm PSEUDO!",
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
+ OpSize;
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX],
+ Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+ def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX;
+}
+
+let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
+ def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+}
+
+// Packed Compare Implicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS] in {
+ multiclass SS42AI_pcmpistri<Intrinsic IntId128, string asm = "pcmpistri"> {
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x63, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+ !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"),
+ [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128, "vpcmpistri">,
+ VEX;
+defm VPCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128, "vpcmpistri">,
+ VEX;
+}
+
+defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
+defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
+defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
+defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
+defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
+defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
+
+// Packed Compare Explicit Length Strings, Return Index
+let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
+ multiclass SS42AI_pcmpestri<Intrinsic IntId128, string asm = "pcmpestri"> {
+ def rr : SS42AI<0x61, MRMSrcReg, (outs),
+ (ins VR128:$src1, VR128:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ def rm : SS42AI<0x61, MRMSrcMem, (outs),
+ (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
+ !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"),
+ [(set ECX,
+ (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
+ (implicit EFLAGS)]>, OpSize;
+ }
+}
+
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128, "vpcmpestri">,
+ VEX;
+defm VPCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128, "vpcmpestri">,
+ VEX;
+}
+
+defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
+defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
+defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
+defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
+defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
+defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
+
+//===----------------------------------------------------------------------===//
+// SSE4.2 - CRC Instructions
+//===----------------------------------------------------------------------===//
+
+// No CRC instructions have AVX equivalents
+
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
@@ -3969,133 +4733,52 @@ let Constraints = "$src1 = $dst" in {
REX_W;
}
-// String/text processing instructions.
-let Defs = [EFLAGS], usesCustomInserter = 1 in {
-def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "#PCMPISTRM128rr PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2,
- imm:$src3))]>, OpSize;
-def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "#PCMPISTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2),
- imm:$src3))]>, OpSize;
-}
-
-let Defs = [XMM0, EFLAGS] in {
-def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
-def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize;
-}
+//===----------------------------------------------------------------------===//
+// AES-NI Instructions
+//===----------------------------------------------------------------------===//
-let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
-def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
-
-def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "#PCMPESTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
- OpSize;
+multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
+ Intrinsic IntId128, bit Is2Addr = 1> {
+ def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
+ OpSize;
+ def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !if(Is2Addr,
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+ [(set VR128:$dst,
+ (IntId128 VR128:$src1,
+ (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
-def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
-def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize;
+// Perform One Round of an AES Encryption/Decryption Flow
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
+ int_x86_aesni_aesenc, 0>, VEX_4V;
+ defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
+ int_x86_aesni_aesenclast, 0>, VEX_4V;
+ defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec",
+ int_x86_aesni_aesdec, 0>, VEX_4V;
+ defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast",
+ int_x86_aesni_aesdeclast, 0>, VEX_4V;
}
-let Defs = [ECX, EFLAGS] in {
- multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src2, i8imm:$src3),
- "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
- def rm : SS42AI<0x63, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
- "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
- [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)),
- (implicit EFLAGS)]>, OpSize;
- }
-}
-
-defm PCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128>;
-defm PCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128>;
-defm PCMPISTRIC : SS42AI_pcmpistri<int_x86_sse42_pcmpistric128>;
-defm PCMPISTRIO : SS42AI_pcmpistri<int_x86_sse42_pcmpistrio128>;
-defm PCMPISTRIS : SS42AI_pcmpistri<int_x86_sse42_pcmpistris128>;
-defm PCMPISTRIZ : SS42AI_pcmpistri<int_x86_sse42_pcmpistriz128>;
-
-let Defs = [ECX, EFLAGS] in {
-let Uses = [EAX, EDX] in {
- multiclass SS42AI_pcmpestri<Intrinsic IntId128> {
- def rr : SS42AI<0x61, MRMSrcReg, (outs),
- (ins VR128:$src1, VR128:$src3, i8imm:$src5),
- "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
- def rm : SS42AI<0x61, MRMSrcMem, (outs),
- (ins VR128:$src1, i128mem:$src3, i8imm:$src5),
- "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX,
- (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
- (implicit EFLAGS)]>, OpSize;
- }
-}
-}
-
-defm PCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128>;
-defm PCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128>;
-defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
-defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
-defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
-defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
-
-//===----------------------------------------------------------------------===//
-// AES-NI Instructions
-//===----------------------------------------------------------------------===//
-
let Constraints = "$src1 = $dst" in {
- multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
- Intrinsic IntId128, bit Commutable = 0> {
- def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, VR128:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
- OpSize {
- let isCommutable = Commutable;
- }
- def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
- (ins VR128:$src1, i128mem:$src2),
- !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
- [(set VR128:$dst,
- (IntId128 VR128:$src1,
- (bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
- }
+ defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+ defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+ defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+ defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
}
-defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
- int_x86_aesni_aesenc>;
-defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
- int_x86_aesni_aesenclast>;
-defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
- int_x86_aesni_aesdec>;
-defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
- int_x86_aesni_aesdeclast>;
-
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
(AESENCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
@@ -4113,13 +4796,27 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
+// Perform the AES InvMixColumn Transformation
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc VR128:$src1))]>,
+ OpSize, VEX;
+ def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1),
+ "vaesimc\t{$src1, $dst|$dst, $src1}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
+ OpSize, VEX;
+}
def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
[(set VR128:$dst,
(int_x86_aesni_aesimc VR128:$src1))]>,
OpSize;
-
def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1),
"aesimc\t{$src1, $dst|$dst, $src1}",
@@ -4127,6 +4824,22 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
(int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>,
OpSize;
+// AES Round Key Generation Assist
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+ def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize, VEX;
+ def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i8imm:$src2),
+ "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize, VEX;
+}
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index a9681e6..633ddd4 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -30,7 +30,7 @@ class X86MCCodeEmitter : public MCCodeEmitter {
MCContext &Ctx;
bool Is64BitMode;
public:
- X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
+ X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
: TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
Is64BitMode = is64Bit;
}
@@ -38,17 +38,18 @@ public:
~X86MCCodeEmitter() {}
unsigned getNumFixupKinds() const {
- return 4;
+ return 5;
}
const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const {
const static MCFixupKindInfo Infos[] = {
{ "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel },
+ { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel },
{ "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }
};
-
+
if (Kind < FirstTargetFixupKind)
return MCCodeEmitter::getFixupKindInfo(Kind);
@@ -56,16 +57,38 @@ public:
"Invalid kind!");
return Infos[Kind - FirstTargetFixupKind];
}
-
+
static unsigned GetX86RegNum(const MCOperand &MO) {
return X86RegisterInfo::getX86RegNum(MO.getReg());
}
-
+
+ // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+ // 0-7 and the difference between the 2 groups is given by the REX prefix.
+ // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+ // in 1's complement form, example:
+ //
+ // ModRM field => XMM9 => 1
+ // VEX.VVVV => XMM9 => ~9
+ //
+ // See table 4-35 of Intel AVX Programming Reference for details.
+ static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+ unsigned OpNum) {
+ unsigned SrcReg = MI.getOperand(OpNum).getReg();
+ unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+ if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) ||
+ (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15))
+ SrcRegNum += 8;
+
+ // The registers represented through VEX_VVVV should
+ // be encoded in 1's complement form.
+ return (~SrcRegNum) & 0xf;
+ }
+
void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
OS << (char)C;
++CurByte;
}
-
+
void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte,
raw_ostream &OS) const {
// Output the constant in little endian byte order.
@@ -75,38 +98,49 @@ public:
}
}
- void EmitImmediate(const MCOperand &Disp,
+ void EmitImmediate(const MCOperand &Disp,
unsigned ImmSize, MCFixupKind FixupKind,
unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
int ImmOffset = 0) const;
-
+
inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode,
unsigned RM) {
assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!");
return RM | (RegOpcode << 3) | (Mod << 6);
}
-
+
void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld,
unsigned &CurByte, raw_ostream &OS) const {
EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS);
}
-
+
void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base,
unsigned &CurByte, raw_ostream &OS) const {
// SIB byte is in the same format as the ModRMByte.
EmitByte(ModRMByte(SS, Index, Base), CurByte, OS);
}
-
-
+
+
void EmitMemModRMByte(const MCInst &MI, unsigned Op,
- unsigned RegOpcodeField,
- unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
+ unsigned RegOpcodeField,
+ uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const;
-
+
+ void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const TargetInstrDesc &Desc,
+ raw_ostream &OS) const;
+
+ void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ raw_ostream &OS) const;
+
+ void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
+ const MCInst &MI, const TargetInstrDesc &Desc,
+ raw_ostream &OS) const;
};
} // end anonymous namespace
@@ -124,24 +158,23 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
return new X86MCCodeEmitter(TM, Ctx, true);
}
-
-/// isDisp8 - Return true if this signed displacement fits in a 8-bit
-/// sign-extended field.
+/// isDisp8 - Return true if this signed displacement fits in a 8-bit
+/// sign-extended field.
static bool isDisp8(int Value) {
return Value == (signed char)Value;
}
/// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
/// in an instruction with the specified TSFlags.
-static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
unsigned Size = X86II::getSizeOfImm(TSFlags);
bool isPCRel = X86II::isImmPCRel(TSFlags);
-
+
switch (Size) {
default: assert(0 && "Unknown immediate size");
case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1;
+ case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2;
case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4;
- case 2: assert(!isPCRel); return FK_Data_2;
case 8: assert(!isPCRel); return FK_Data_8;
}
}
@@ -162,29 +195,30 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind,
// If we have an immoffset, add it to the expression.
const MCExpr *Expr = DispOp.getExpr();
-
+
// If the fixup is pc-relative, we need to bias the value to be relative to
// the start of the field, not the end of the field.
if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte) ||
FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load))
ImmOffset -= 4;
+ if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte))
+ ImmOffset -= 2;
if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte))
ImmOffset -= 1;
-
+
if (ImmOffset)
Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx),
Ctx);
-
+
// Emit a symbolic constant as a fixup and 4 zeros.
Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind));
EmitConstant(0, Size, CurByte, OS);
}
-
void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
unsigned RegOpcodeField,
- unsigned TSFlags, unsigned &CurByte,
+ uint64_t TSFlags, unsigned &CurByte,
raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const{
const MCOperand &Disp = MI.getOperand(Op+3);
@@ -192,43 +226,43 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
const MCOperand &Scale = MI.getOperand(Op+1);
const MCOperand &IndexReg = MI.getOperand(Op+2);
unsigned BaseReg = Base.getReg();
-
+
// Handle %rip relative addressing.
if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode
- assert(IndexReg.getReg() == 0 && Is64BitMode &&
- "Invalid rip-relative address");
+ assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+ assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
-
+
unsigned FixupKind = X86::reloc_riprel_4byte;
-
+
// movq loads are handled with a special relocation form which allows the
// linker to eliminate some loads for GOT references which end up in the
// same linkage unit.
if (MI.getOpcode() == X86::MOV64rm ||
MI.getOpcode() == X86::MOV64rm_TC)
FixupKind = X86::reloc_riprel_4byte_movq_load;
-
+
// rip-relative addressing is actually relative to the *next* instruction.
// Since an immediate can follow the mod/rm byte for an instruction, this
// means that we need to bias the immediate field of the instruction with
// the size of the immediate field. If we have this case, add it into the
// expression to emit.
int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0;
-
+
EmitImmediate(Disp, 4, MCFixupKind(FixupKind),
CurByte, OS, Fixups, -ImmSize);
return;
}
-
+
unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U;
-
+
// Determine whether a SIB byte is needed.
- // If no BaseReg, issue a RIP relative instruction only if the MCE can
+ // If no BaseReg, issue a RIP relative instruction only if the MCE can
// resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table
// 2-7) and absolute references.
if (// The SIB byte must be used if there is an index register.
- IndexReg.getReg() == 0 &&
+ IndexReg.getReg() == 0 &&
// The SIB byte must be used if the base is ESP/RSP/R12, all of which
// encode to an R/M value of 4, which indicates that a SIB byte is
// present.
@@ -242,7 +276,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
-
+
// If the base is not EBP/ESP and there is no displacement, use simple
// indirect register encoding, this handles addresses like [EAX]. The
// encoding for [EBP] with no displacement means [disp32] so we handle it
@@ -251,24 +285,24 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS);
return;
}
-
+
// Otherwise, if the displacement fits in a byte, encode as [REG+disp8].
if (Disp.isImm() && isDisp8(Disp.getImm())) {
EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS);
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
return;
}
-
+
// Otherwise, emit the most general non-SIB encoding: [REG+disp32]
EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS);
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
return;
}
-
+
// We need a SIB byte, so start by outputting the ModR/M byte first
assert(IndexReg.getReg() != X86::ESP &&
IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!");
-
+
bool ForceDisp32 = false;
bool ForceDisp8 = false;
if (BaseReg == 0) {
@@ -294,13 +328,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
// Emit the normal disp32 encoding.
EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS);
}
-
+
// Calculate what the SS field value should be...
static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 };
unsigned SS = SSTable[Scale.getImm()];
-
+
if (BaseReg == 0) {
- // Handle the SIB byte for the case where there is no base, see Intel
+ // Handle the SIB byte for the case where there is no base, see Intel
// Manual 2A, table 2-7. The displacement has already been output.
unsigned IndexRegNo;
if (IndexReg.getReg())
@@ -316,7 +350,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
IndexRegNo = 4; // For example [ESP+1*<noreg>+4]
EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS);
}
-
+
// Do we need to output a displacement?
if (ForceDisp8)
EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups);
@@ -324,26 +358,216 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
}
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const TargetInstrDesc &Desc,
+ raw_ostream &OS) const {
+ bool HasVEX_4V = false;
+ if (TSFlags & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // VEX_R: opcode externsion equivalent to REX.R in
+ // 1's complement (inverted) form
+ //
+ // 1: Same as REX_R=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX_R=1 (64 bit mode only)
+ //
+ unsigned char VEX_R = 0x1;
+
+ // VEX_X: equivalent to REX.X, only used when a
+ // register is used for index in SIB Byte.
+ //
+ // 1: Same as REX.X=0 (must be 1 in 32-bit mode)
+ // 0: Same as REX.X=1 (64-bit mode only)
+ unsigned char VEX_X = 0x1;
+
+ // VEX_B:
+ //
+ // 1: Same as REX_B=0 (ignored in 32-bit mode)
+ // 0: Same as REX_B=1 (64 bit mode only)
+ //
+ unsigned char VEX_B = 0x1;
+
+ // VEX_W: opcode specific (use like REX.W, or used for
+ // opcode extension, or ignored, depending on the opcode byte)
+ unsigned char VEX_W = 0;
+
+ // VEX_5M (VEX m-mmmmm field):
+ //
+ // 0b00000: Reserved for future use
+ // 0b00001: implied 0F leading opcode
+ // 0b00010: implied 0F 38 leading opcode bytes
+ // 0b00011: implied 0F 3A leading opcode bytes
+ // 0b00100-0b11111: Reserved for future use
+ //
+ unsigned char VEX_5M = 0x1;
+
+ // VEX_4V (VEX vvvv field): a register specifier
+ // (in 1's complement form) or 1111 if unused.
+ unsigned char VEX_4V = 0xf;
+
+ // VEX_L (Vector Length):
+ //
+ // 0: scalar or 128-bit vector
+ // 1: 256-bit vector
+ //
+ unsigned char VEX_L = 0;
+
+ // VEX_PP: opcode extension providing equivalent
+ // functionality of a SIMD prefix
+ //
+ // 0b00: None
+ // 0b01: 66
+ // 0b10: F3
+ // 0b11: F2
+ //
+ unsigned char VEX_PP = 0;
+
+ // Encode the operand size opcode prefix as needed.
+ if (TSFlags & X86II::OpSize)
+ VEX_PP = 0x01;
+
+ if (TSFlags & X86II::VEX_W)
+ VEX_W = 1;
+
+ switch (TSFlags & X86II::Op0Mask) {
+ default: assert(0 && "Invalid prefix!");
+ case X86II::T8: // 0F 38
+ VEX_5M = 0x2;
+ break;
+ case X86II::TA: // 0F 3A
+ VEX_5M = 0x3;
+ break;
+ case X86II::TF: // F2 0F 38
+ VEX_PP = 0x3;
+ VEX_5M = 0x2;
+ break;
+ case X86II::XS: // F3 0F
+ VEX_PP = 0x2;
+ break;
+ case X86II::XD: // F2 0F
+ VEX_PP = 0x3;
+ break;
+ case X86II::TB: // Bypass: Not used by VEX
+ case 0:
+ break; // No prefix!
+ }
+
+ // Set the vector length to 256-bit if YMM0-YMM15 is used
+ for (unsigned i = 0; i != MI.getNumOperands(); ++i) {
+ if (!MI.getOperand(i).isReg())
+ continue;
+ unsigned SrcReg = MI.getOperand(i).getReg();
+ if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15)
+ VEX_L = 1;
+ }
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned CurOp = 0;
+
+ switch (TSFlags & X86II::FormMask) {
+ case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+ case X86II::MRM0m: case X86II::MRM1m:
+ case X86II::MRM2m: case X86II::MRM3m:
+ case X86II::MRM4m: case X86II::MRM5m:
+ case X86II::MRM6m: case X86II::MRM7m:
+ case X86II::MRMDestMem:
+ NumOps = CurOp = X86::AddrNumOperands;
+ case X86II::MRMSrcMem:
+ case X86II::MRMSrcReg:
+ if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_R = 0x0;
+
+ // CurOp and NumOps are equal when VEX_R represents a register used
+ // to index a memory destination (which is the last operand)
+ CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
+
+ if (HasVEX_4V) {
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+ CurOp++;
+ }
+
+ // If the last register should be encoded in the immediate field
+ // do not use any bit from VEX prefix to this register, ignore it
+ if (TSFlags & X86II::VEX_I8IMM)
+ NumOps--;
+
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_B = 0x0;
+ if (!VEX_B && MO.isReg() &&
+ ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_X = 0x0;
+ }
+ break;
+ default: // MRMDestReg, MRM0r-MRM7r
+ if (MI.getOperand(CurOp).isReg() &&
+ X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
+ VEX_B = 0;
+
+ if (HasVEX_4V)
+ VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+ CurOp++;
+ for (; CurOp != NumOps; ++CurOp) {
+ const MCOperand &MO = MI.getOperand(CurOp);
+ if (MO.isReg() && !HasVEX_4V &&
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+ VEX_R = 0x0;
+ }
+ break;
+ assert(0 && "Not implemented!");
+ }
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
+ // VEX opcode prefix can have 2 or 3 bytes
+ //
+ // 3 bytes:
+ // +-----+ +--------------+ +-------------------+
+ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+ // +-----+ +--------------+ +-------------------+
+ // 2 bytes:
+ // +-----+ +-------------------+
+ // | C5h | | R | vvvv | L | pp |
+ // +-----+ +-------------------+
+ //
+ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+ if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix
+ EmitByte(0xC5, CurByte, OS);
+ EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+ return;
+ }
+
+ // 3 byte VEX prefix
+ EmitByte(0xC4, CurByte, OS);
+ EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS);
+ EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
/// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
/// size, and 3) use of X86-64 extended registers.
-static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
const TargetInstrDesc &Desc) {
- // Pseudo instructions never have a rex byte.
- if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
- return 0;
-
unsigned REX = 0;
if (TSFlags & X86II::REX_W)
- REX |= 1 << 3;
-
+ REX |= 1 << 3; // set REX.W
+
if (MI.getNumOperands() == 0) return REX;
-
+
unsigned NumOps = MI.getNumOperands();
// FIXME: MCInst should explicitize the two-addrness.
bool isTwoAddr = NumOps > 1 &&
Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
-
+
// If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
unsigned i = isTwoAddr ? 1 : 0;
for (; i != NumOps; ++i) {
@@ -353,34 +577,34 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue;
// FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
// that returns non-zero.
- REX |= 0x40;
+ REX |= 0x40; // REX fixed encoding prefix
break;
}
-
+
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
case X86II::MRMSrcReg:
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << 0;
+ REX |= 1 << 0; // set REX.B
}
break;
case X86II::MRMSrcMem: {
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
i = isTwoAddr ? 2 : 1;
for (; i != NumOps; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << Bit;
+ REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
}
@@ -391,17 +615,17 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
case X86II::MRM4m: case X86II::MRM5m:
case X86II::MRM6m: case X86II::MRM7m:
case X86II::MRMDestMem: {
- unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands);
+ unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands);
i = isTwoAddr ? 1 : 0;
if (NumOps > e && MI.getOperand(e).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
unsigned Bit = 0;
for (; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg()) {
if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << Bit;
+ REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1)
Bit++;
}
}
@@ -410,39 +634,40 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
default:
if (MI.getOperand(0).isReg() &&
X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
- REX |= 1 << 0;
+ REX |= 1 << 0; // set REX.B
i = isTwoAddr ? 2 : 1;
for (unsigned e = NumOps; i != e; ++i) {
const MCOperand &MO = MI.getOperand(i);
if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
- REX |= 1 << 2;
+ REX |= 1 << 2; // set REX.R
}
break;
}
return REX;
}
-void X86MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
- SmallVectorImpl<MCFixup> &Fixups) const {
- unsigned Opcode = MI.getOpcode();
- const TargetInstrDesc &Desc = TII.get(Opcode);
- unsigned TSFlags = Desc.TSFlags;
-
- // Keep track of the current byte being emitted.
- unsigned CurByte = 0;
-
- // FIXME: We should emit the prefixes in exactly the same order as GAS does,
- // in order to provide diffability.
-
- // Emit the lock opcode prefix as needed.
- if (TSFlags & X86II::LOCK)
- EmitByte(0xF0, CurByte, OS);
-
- // Emit segment override opcode prefix as needed.
+/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed
+void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
+ unsigned &CurByte, int MemOperand,
+ const MCInst &MI,
+ raw_ostream &OS) const {
switch (TSFlags & X86II::SegOvrMask) {
default: assert(0 && "Invalid segment!");
- case 0: break; // No segment override!
+ case 0:
+ // No segment override, check for explicit one on memory operand.
+ if (MemOperand != -1) { // If the instruction has a memory operand.
+ switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) {
+ default: assert(0 && "Unknown segment register!");
+ case 0: break;
+ case X86::CS: EmitByte(0x2E, CurByte, OS); break;
+ case X86::SS: EmitByte(0x36, CurByte, OS); break;
+ case X86::DS: EmitByte(0x3E, CurByte, OS); break;
+ case X86::ES: EmitByte(0x26, CurByte, OS); break;
+ case X86::FS: EmitByte(0x64, CurByte, OS); break;
+ case X86::GS: EmitByte(0x65, CurByte, OS); break;
+ }
+ }
+ break;
case X86II::FS:
EmitByte(0x64, CurByte, OS);
break;
@@ -450,19 +675,36 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0x65, CurByte, OS);
break;
}
-
+}
+
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+///
+/// MemOperand is the operand # of the start of a memory operand if present. If
+/// Not present, it is -1.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+ int MemOperand, const MCInst &MI,
+ const TargetInstrDesc &Desc,
+ raw_ostream &OS) const {
+
+ // Emit the lock opcode prefix as needed.
+ if (TSFlags & X86II::LOCK)
+ EmitByte(0xF0, CurByte, OS);
+
+ // Emit segment override opcode prefix as needed.
+ EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS);
+
// Emit the repeat opcode prefix as needed.
if ((TSFlags & X86II::Op0Mask) == X86II::REP)
EmitByte(0xF3, CurByte, OS);
-
+
// Emit the operand size opcode prefix as needed.
if (TSFlags & X86II::OpSize)
EmitByte(0x66, CurByte, OS);
-
+
// Emit the address size opcode prefix as needed.
if (TSFlags & X86II::AdSize)
EmitByte(0x67, CurByte, OS);
-
+
bool Need0FPrefix = false;
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
@@ -494,18 +736,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::DE: EmitByte(0xDE, CurByte, OS); break;
case X86II::DF: EmitByte(0xDF, CurByte, OS); break;
}
-
+
// Handle REX prefix.
// FIXME: Can this come before F2 etc to simplify emission?
if (Is64BitMode) {
if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
EmitByte(0x40 | REX, CurByte, OS);
}
-
+
// 0x0F escape code must be emitted just before the opcode.
if (Need0FPrefix)
EmitByte(0x0F, CurByte, OS);
-
+
// FIXME: Pull this up into previous switch if REX can be moved earlier.
switch (TSFlags & X86II::Op0Mask) {
case X86II::TF: // F2 0F 38
@@ -516,8 +758,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0x3A, CurByte, OS);
break;
}
-
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ unsigned Opcode = MI.getOpcode();
+ const TargetInstrDesc &Desc = TII.get(Opcode);
+ uint64_t TSFlags = Desc.TSFlags;
+
+ // Pseudo instructions don't get encoded.
+ if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+ return;
+
// If this is a two-address instruction, skip one of the register operands.
+ // FIXME: This should be handled during MCInst lowering.
unsigned NumOps = Desc.getNumOperands();
unsigned CurOp = 0;
if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
@@ -525,56 +780,85 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
// Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
--NumOps;
-
+
+ // Keep track of the current byte being emitted.
+ unsigned CurByte = 0;
+
+ // Is this instruction encoded using the AVX VEX prefix?
+ bool HasVEXPrefix = false;
+
+ // It uses the VEX.VVVV field?
+ bool HasVEX_4V = false;
+
+ if (TSFlags & X86II::VEX)
+ HasVEXPrefix = true;
+ if (TSFlags & X86II::VEX_4V)
+ HasVEX_4V = true;
+
+ // Determine where the memory operand starts, if present.
+ int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);
+ if (MemoryOperand != -1) MemoryOperand += CurOp;
+
+ if (!HasVEXPrefix)
+ EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+ else
+ EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS);
+
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+ unsigned SrcRegNum = 0;
switch (TSFlags & X86II::FormMask) {
case X86II::MRMInitReg:
assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n";
assert(0 && "Unknown FormMask value in X86MCCodeEmitter!");
- case X86II::Pseudo: return; // Pseudo instructions encode to nothing.
+ case X86II::Pseudo:
+ assert(0 && "Pseudo instruction shouldn't be emitted");
case X86II::RawFrm:
EmitByte(BaseOpcode, CurByte, OS);
break;
-
+
case X86II::AddRegFrm:
EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS);
break;
-
+
case X86II::MRMDestReg:
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp),
GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS);
CurOp += 2;
break;
-
+
case X86II::MRMDestMem:
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp,
- GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)),
+ GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)),
TSFlags, CurByte, OS, Fixups);
- CurOp += X86AddrNumOperands + 1;
+ CurOp += X86::AddrNumOperands + 1;
break;
-
+
case X86II::MRMSrcReg:
EmitByte(BaseOpcode, CurByte, OS);
- EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
- CurByte, OS);
- CurOp += 2;
+ SrcRegNum = CurOp + 1;
+
+ if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
+ SrcRegNum++;
+
+ EmitRegModRMByte(MI.getOperand(SrcRegNum),
+ GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+ CurOp = SrcRegNum + 1;
break;
-
+
case X86II::MRMSrcMem: {
+ int AddrOperands = X86::AddrNumOperands;
+ unsigned FirstMemOp = CurOp+1;
+ if (HasVEX_4V) {
+ ++AddrOperands;
+ ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV).
+ }
+
EmitByte(BaseOpcode, CurByte, OS);
- // FIXME: Maybe lea should have its own form? This is a horrible hack.
- int AddrOperands;
- if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
- Opcode == X86::LEA16r || Opcode == X86::LEA32r)
- AddrOperands = X86AddrNumOperands - 1; // No segment register
- else
- AddrOperands = X86AddrNumOperands;
-
- EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)),
+ EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
TSFlags, CurByte, OS, Fixups);
CurOp += AddrOperands + 1;
break;
@@ -584,6 +868,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
case X86II::MRM2r: case X86II::MRM3r:
case X86II::MRM4r: case X86II::MRM5r:
case X86II::MRM6r: case X86II::MRM7r:
+ if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+ CurOp++;
EmitByte(BaseOpcode, CurByte, OS);
EmitRegModRMByte(MI.getOperand(CurOp++),
(TSFlags & X86II::FormMask)-X86II::MRM0r,
@@ -596,7 +882,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(BaseOpcode, CurByte, OS);
EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m,
TSFlags, CurByte, OS, Fixups);
- CurOp += X86AddrNumOperands;
+ CurOp += X86::AddrNumOperands;
break;
case X86II::MRM_C1:
EmitByte(BaseOpcode, CurByte, OS);
@@ -639,14 +925,27 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
EmitByte(0xF9, CurByte, OS);
break;
}
-
+
// If there is a remaining operand, it must be a trailing immediate. Emit it
// according to the right size for the instruction.
- if (CurOp != NumOps)
- EmitImmediate(MI.getOperand(CurOp++),
- X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
- CurByte, OS, Fixups);
-
+ if (CurOp != NumOps) {
+ // The last source register of a 4 operand instruction in AVX is encoded
+ // in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
+ if (TSFlags & X86II::VEX_I8IMM) {
+ const MCOperand &MO = MI.getOperand(CurOp++);
+ bool IsExtReg =
+ X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
+ unsigned RegNum = (IsExtReg ? (1 << 7) : 0);
+ RegNum |= GetX86RegNum(MO) << 4;
+ EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS,
+ Fixups);
+ } else
+ EmitImmediate(MI.getOperand(CurOp++),
+ X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags),
+ CurByte, OS, Fixups);
+ }
+
+
#ifndef NDEBUG
// FIXME: Verify.
if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) {
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 98975ea..5f31e00 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7:
return RegNo-X86::ST0;
- case X86::XMM0: case X86::XMM8: case X86::MM0:
+ case X86::XMM0: case X86::XMM8:
+ case X86::YMM0: case X86::YMM8: case X86::MM0:
return 0;
- case X86::XMM1: case X86::XMM9: case X86::MM1:
+ case X86::XMM1: case X86::XMM9:
+ case X86::YMM1: case X86::YMM9: case X86::MM1:
return 1;
- case X86::XMM2: case X86::XMM10: case X86::MM2:
+ case X86::XMM2: case X86::XMM10:
+ case X86::YMM2: case X86::YMM10: case X86::MM2:
return 2;
- case X86::XMM3: case X86::XMM11: case X86::MM3:
+ case X86::XMM3: case X86::XMM11:
+ case X86::YMM3: case X86::YMM11: case X86::MM3:
return 3;
- case X86::XMM4: case X86::XMM12: case X86::MM4:
+ case X86::XMM4: case X86::XMM12:
+ case X86::YMM4: case X86::YMM12: case X86::MM4:
return 4;
- case X86::XMM5: case X86::XMM13: case X86::MM5:
+ case X86::XMM5: case X86::XMM13:
+ case X86::YMM5: case X86::YMM13: case X86::MM5:
return 5;
- case X86::XMM6: case X86::XMM14: case X86::MM6:
+ case X86::XMM6: case X86::XMM14:
+ case X86::YMM6: case X86::YMM14: case X86::MM6:
return 6;
- case X86::XMM7: case X86::XMM15: case X86::MM7:
+ case X86::XMM7: case X86::XMM15:
+ case X86::YMM7: case X86::YMM15: case X86::MM7:
return 7;
case X86::ES:
@@ -157,6 +165,34 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::GS:
return 5;
+ case X86::CR0:
+ return 0;
+ case X86::CR1:
+ return 1;
+ case X86::CR2:
+ return 2;
+ case X86::CR3:
+ return 3;
+ case X86::CR4:
+ return 4;
+
+ case X86::DR0:
+ return 0;
+ case X86::DR1:
+ return 1;
+ case X86::DR2:
+ return 2;
+ case X86::DR3:
+ return 3;
+ case X86::DR4:
+ return 4;
+ case X86::DR5:
+ return 5;
+ case X86::DR6:
+ return 6;
+ case X86::DR7:
+ return 7;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -357,56 +393,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
-const TargetRegisterClass* const*
-X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- bool callsEHReturn = false;
- if (MF)
- callsEHReturn = MF->getMMI().callsEHReturn();
-
- static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = {
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = {
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass,
- &X86::GR32RegClass, &X86::GR32RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass, 0
- };
- static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = {
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::GR64RegClass, &X86::GR64RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass,
- &X86::VR128RegClass, &X86::VR128RegClass, 0
- };
-
- if (Is64Bit) {
- if (IsWin64)
- return CalleeSavedRegClassesWin64;
- else
- return (callsEHReturn ?
- CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit);
- } else {
- return (callsEHReturn ?
- CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit);
- }
-}
-
BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
// Set the stack-pointer register and its aliases as reserved.
@@ -696,8 +682,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// }
// [EBP]
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta,
- true, false);
+ (-1U*SlotSize)+TailCallReturnAddrDelta, true);
}
if (hasFP(MF)) {
@@ -710,7 +695,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
-(int)SlotSize +
TFI.getOffsetOfLocalArea() +
TailCallReturnAddrDelta,
- true, false);
+ true);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
@@ -1240,8 +1225,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
if (CSSize) {
unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
MachineInstr *MI =
- addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
- FramePtr, false, -CSSize);
+ addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
+ FramePtr, false, -CSSize);
MBB.insert(MBBI, MI);
} else {
BuildMI(MBB, MBBI, DL,
@@ -1301,9 +1286,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
for (unsigned i = 0; i != 5; ++i)
MIB.addOperand(MBBI->getOperand(i));
} else if (RetOpcode == X86::TCRETURNri64) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
} else {
- BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg());
+ BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
+ addReg(JumpTarget.getReg(), RegState::Kill);
}
MachineInstr *NewMI = prior(MBBI);
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index d0b82e2..d852bcd 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -105,12 +105,6 @@ public:
/// callee-save registers on this target.
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
- /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred
- /// register classes to spill each callee-saved register with. The order and
- /// length of this list match the getCalleeSavedRegs() list.
- const TargetRegisterClass* const*
- getCalleeSavedRegClasses(const MachineFunction *MF = 0) const;
-
/// getReservedRegs - Returns a bitset indexed by physical register number
/// indicating if a register is a special register that has particular uses and
/// should be considered unavailable at all times, e.g. SP, RA. This is used by
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 91cfaa9..9f0382e 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -147,7 +147,7 @@ let Namespace = "X86" in {
def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>;
def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>;
def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>;
-
+
// Pseudo Floating Point registers
def FP0 : Register<"fp0">;
def FP1 : Register<"fp1">;
@@ -155,7 +155,7 @@ let Namespace = "X86" in {
def FP3 : Register<"fp3">;
def FP4 : Register<"fp4">;
def FP5 : Register<"fp5">;
- def FP6 : Register<"fp6">;
+ def FP6 : Register<"fp6">;
// XMM Registers, used by the various SSE instruction set extensions.
// The sub_ss and sub_sd subregs are the same registers with another regclass.
@@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
}];
}
-def GR32 : RegisterClass<"X86", [i32], 32,
+def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
@@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
// GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
// RIP isn't really a register and it can't be used anywhere except in an
// address, but it doesn't cause trouble.
-def GR64 : RegisterClass<"X86", [i64], 64,
+def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
@@ -446,7 +446,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> {
}
// Debug registers.
-def DEBUG_REG : RegisterClass<"X86", [i32], 32,
+def DEBUG_REG : RegisterClass<"X86", [i32], 32,
[DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> {
}
@@ -780,14 +780,14 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32,
}
// Generic vector registers: VR64 and VR128.
-def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64,
+def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64,
[MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
-
+
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -803,11 +803,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}
}];
}
-def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
+
+def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
+
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ VR256Class::iterator
+ VR256Class::allocation_order_end(const MachineFunction &MF) const {
+ const TargetMachine &TM = MF.getTarget();
+ const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
+ if (!Subtarget.is64Bit())
+ return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode.
+ else
+ return end();
+ }
+ }];
}
// Status flags registers.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 09a2685..4a10be5 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -53,9 +53,12 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
if (GV->hasDLLImportLinkage())
return X86II::MO_DLLIMPORT;
- // Materializable GVs (in JIT lazy compilation mode) do not require an
- // extra load from stub.
- bool isDecl = GV->isDeclaration() && !GV->isMaterializable();
+ // Determine whether this is a reference to a definition or a declaration.
+ // Materializable GVs (in JIT lazy compilation mode) do not require an extra
+ // load from stub.
+ bool isDecl = GV->hasAvailableExternallyLinkage();
+ if (GV->isDeclaration() && !GV->isMaterializable())
+ isDecl = true;
// X86-64 in PIC mode.
if (isPICStyleRIPRel()) {
@@ -293,12 +296,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, IsBTMemSlow(false)
, IsUAMemFast(false)
, HasVectorUAMem(false)
- , DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
, MaxInlineSizeThreshold(128)
- , Is64Bit(is64Bit)
- , TargetType(isELF) { // Default to ELF unless otherwise specified.
+ , TargetTriple(TT)
+ , Is64Bit(is64Bit) {
// default to hard float ABI
if (FloatABIType == FloatABI::Default)
@@ -328,47 +330,40 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
HasCMov = true;
}
-
DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
<< ", 3DNowLevel " << X863DNowLevel
<< ", 64bit " << HasX86_64 << "\n");
assert((!Is64Bit || HasX86_64) &&
"64-bit code requested on a subtarget that doesn't support it!");
- // Set the boolean corresponding to the current target triple, or the default
- // if one cannot be determined, to true.
- if (TT.length() > 5) {
- size_t Pos;
- if ((Pos = TT.find("-darwin")) != std::string::npos) {
- TargetType = isDarwin;
-
- // Compute the darwin version number.
- if (isdigit(TT[Pos+7]))
- DarwinVers = atoi(&TT[Pos+7]);
- else
- DarwinVers = 8; // Minimum supported darwin is Tiger.
- } else if (TT.find("linux") != std::string::npos) {
- // Linux doesn't imply ELF, but we don't currently support anything else.
- TargetType = isELF;
- } else if (TT.find("cygwin") != std::string::npos) {
- TargetType = isCygwin;
- } else if (TT.find("mingw") != std::string::npos) {
- TargetType = isMingw;
- } else if (TT.find("win32") != std::string::npos) {
- TargetType = isWindows;
- } else if (TT.find("windows") != std::string::npos) {
- TargetType = isWindows;
- } else if (TT.find("-cl") != std::string::npos) {
- TargetType = isDarwin;
- DarwinVers = 9;
- }
- }
-
// Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64
// bit targets.
- if (TargetType == isDarwin || Is64Bit)
+ if (isTargetDarwin() || Is64Bit)
stackAlignment = 16;
if (StackAlignment)
stackAlignment = StackAlignment;
}
+
+/// IsCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86Subtarget::IsCalleePop(bool IsVarArg,
+ CallingConv::ID CallingConv) const {
+ if (IsVarArg)
+ return false;
+
+ switch (CallingConv) {
+ default:
+ return false;
+ case CallingConv::X86_StdCall:
+ return !is64Bit();
+ case CallingConv::X86_FastCall:
+ return !is64Bit();
+ case CallingConv::X86_ThisCall:
+ return !is64Bit();
+ case CallingConv::Fast:
+ return GuaranteedTailCallOpt;
+ case CallingConv::GHC:
+ return GuaranteedTailCallOpt;
+ }
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 646af91..486dbc4 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -14,7 +14,9 @@
#ifndef X86SUBTARGET_H
#define X86SUBTARGET_H
+#include "llvm/ADT/Triple.h"
#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/CallingConv.h"
#include <string>
namespace llvm {
@@ -88,10 +90,6 @@ protected:
/// operands. This may require setting a feature bit in the processor.
bool HasVectorUAMem;
- /// DarwinVers - Nonzero if this is a darwin platform: the numeric
- /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
- unsigned char DarwinVers; // Is any darwin-x86 platform.
-
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -99,6 +97,9 @@ protected:
/// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops.
///
unsigned MaxInlineSizeThreshold;
+
+ /// TargetTriple - What processor and OS we're targeting.
+ Triple TargetTriple;
private:
/// Is64Bit - True if the processor supports 64-bit instructions and
@@ -106,9 +107,6 @@ private:
bool Is64Bit;
public:
- enum {
- isELF, isCygwin, isDarwin, isWindows, isMingw
- } TargetType;
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -157,24 +155,31 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
- bool isTargetDarwin() const { return TargetType == isDarwin; }
- bool isTargetELF() const { return TargetType == isELF; }
+ bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
+
+ // ELF is a reasonably sane default and the only other X86 targets we
+ // support are Darwin and Windows. Just use "not those".
+ bool isTargetELF() const {
+ return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing();
+ }
+ bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; }
- bool isTargetWindows() const { return TargetType == isWindows; }
- bool isTargetMingw() const { return TargetType == isMingw; }
- bool isTargetCygwin() const { return TargetType == isCygwin; }
+ bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; }
+ bool isTargetMingw() const {
+ return TargetTriple.getOS() == Triple::MinGW32 ||
+ TargetTriple.getOS() == Triple::MinGW64; }
+ bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; }
bool isTargetCygMing() const {
- return TargetType == isMingw || TargetType == isCygwin;
+ return isTargetMingw() || isTargetCygwin();
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
- return TargetType == isMingw || TargetType == isCygwin ||
- TargetType == isWindows;
+ return isTargetMingw() || isTargetCygwin() || isTargetWindows();
}
bool isTargetWin64() const {
- return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
+ return Is64Bit && (isTargetMingw() || isTargetWindows());
}
std::string getDataLayout() const {
@@ -208,7 +213,10 @@ public:
/// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
/// 10 = Snow Leopard, etc.
- unsigned getDarwinVers() const { return DarwinVers; }
+ unsigned getDarwinVers() const {
+ if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
+ return 0;
+ }
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
@@ -237,6 +245,9 @@ public:
/// indicating the number of scheduling cycles of backscheduling that
/// should be attempted.
unsigned getSpecialAddressLatency() const;
+
+ /// IsCalleePop - Test whether a function should pop its own arguments.
+ bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f2c5058..df00d3f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -173,14 +173,18 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM,
// Install an instruction selector.
PM.add(createX86ISelDag(*this, OptLevel));
- // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
- PM.add(createX87FPRegKillInserterPass());
+ // For 32-bit, prepend instructions to set the "global base reg" for PIC.
+ if (!Subtarget.is64Bit())
+ PM.add(createGlobalBaseRegPass());
return false;
}
bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
+ // Install a pass to insert x87 FP_REG_KILL instructions, as needed.
+ PM.add(createX87FPRegKillInserterPass());
+
PM.add(createX86MaxStackAlignmentHeuristicPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index c100c59..6656bdc 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -138,7 +138,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// FALL THROUGH
case GlobalValue::InternalLinkage:
case GlobalValue::PrivateLinkage:
- case GlobalValue::LinkerPrivateLinkage:
break;
case GlobalValue::DLLImportLinkage:
llvm_unreachable("DLLImport linkage is not supported by this target!");
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index b230572..abe7b2f 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -245,7 +245,7 @@ SDValue XCoreTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
{
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32);
// If it's a debug information descriptor, don't mess with it.
if (DAG.isVerifiedDebugInfoDesc(Op))
return GA;
@@ -269,7 +269,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
DebugLoc dl = Op.getDebugLoc();
// transform to label + getid() * size
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
- SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+ SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
if (!GVar) {
// If GV is an alias then use the aliasee to determine size
@@ -454,12 +454,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const
if (LD->getAlignment() == 2) {
int SVOffset = LD->getSrcValueOffset();
- SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain,
+ SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain,
BasePtr, LD->getSrcValue(), SVOffset, MVT::i16,
LD->isVolatile(), LD->isNonTemporal(), 2);
SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr,
DAG.getConstant(2, MVT::i32));
- SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain,
+ SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain,
HighAddr, LD->getSrcValue(), SVOffset + 2,
MVT::i16, LD->isVolatile(),
LD->isNonTemporal(), 2);
@@ -812,6 +812,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -826,7 +827,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::Fast:
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
- Outs, Ins, dl, DAG, InVals);
+ Outs, OutVals, Ins, dl, DAG, InVals);
}
}
@@ -839,6 +840,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
@@ -866,7 +868,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// Walk the register/memloc assignments, inserting copies/loads.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- SDValue Arg = Outs[i].Val;
+ SDValue Arg = OutVals[i];
// Promote the value if needed.
switch (VA.getLocInfo()) {
@@ -919,7 +921,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee,
// turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
// Likewise ExternalSymbol -> TargetExternalSymbol.
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
- Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32);
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32);
else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee))
Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32);
@@ -1072,7 +1074,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(ObjSize,
LRSaveSize + VA.getLocMemOffset(),
- true, false);
+ true);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -1097,7 +1099,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// address
for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
// Create a stack slot
- int FI = MFI->CreateFixedObject(4, offset, true, false);
+ int FI = MFI->CreateFixedObject(4, offset, true);
if (i == FirstVAReg) {
XFI->setVarArgsFrameIndex(FI);
}
@@ -1120,7 +1122,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// This will point to the next argument passed via stack.
XFI->setVarArgsFrameIndex(
MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
- true, false));
+ true));
}
}
@@ -1133,19 +1135,19 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
bool XCoreTargetLowering::
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const {
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ LLVMContext &Context) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
- RVLocs, *DAG.getContext());
- return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore);
+ RVLocs, Context);
+ return CCInfo.CheckReturn(Outs, RetCC_XCore);
}
SDValue
XCoreTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const {
// CCValAssign - represent the assignment of
@@ -1175,7 +1177,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain,
assert(VA.isRegLoc() && "Can only return in registers!");
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
- Outs[i].Val, Flag);
+ OutVals[i], Flag);
// guarantee that all emitted copies are
// stuck together, avoiding something bad
@@ -1221,23 +1223,22 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineFunction *F = BB->getParent();
MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
- BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
- .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
F->insert(It, copy0MBB);
F->insert(It, sinkMBB);
- // Update machine-CFG edges by first adding all successors of the current
- // block to the new block which will contain the Phi node for the select.
- for (MachineBasicBlock::succ_iterator I = BB->succ_begin(),
- E = BB->succ_end(); I != E; ++I)
- sinkMBB->addSuccessor(*I);
- // Next, remove all successors of the current block, and add the true
- // and fallthrough blocks as its successors.
- while (!BB->succ_empty())
- BB->removeSuccessor(BB->succ_begin());
+
+ // Transfer the remainder of BB and its successor edges to sinkMBB.
+ sinkMBB->splice(sinkMBB->begin(), BB,
+ llvm::next(MachineBasicBlock::iterator(MI)),
+ BB->end());
+ sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
+
// Next, add the true and fallthrough blocks as its successors.
BB->addSuccessor(copy0MBB);
BB->addSuccessor(sinkMBB);
+ BuildMI(BB, dl, TII.get(XCore::BRFT_lru6))
+ .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB);
+
// copy0MBB:
// %FalseValue = ...
// # fallthrough to sinkMBB
@@ -1250,11 +1251,12 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
// ...
BB = sinkMBB;
- BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg())
+ BuildMI(*BB, BB->begin(), dl,
+ TII.get(XCore::PHI), MI->getOperand(0).getReg())
.addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB)
.addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
- F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
}
@@ -1379,7 +1381,6 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N,
SDValue Mul0, Mul1, Addend0, Addend1;
if (N->getValueType(0) == MVT::i32 &&
isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) {
- SDValue Zero = DAG.getConstant(0, MVT::i32);
SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl,
DAG.getVTList(MVT::i32, MVT::i32), Mul0,
Mul1, Addend0, Addend1);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index d8d2a3a..febc198 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -120,6 +120,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -178,6 +179,7 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
@@ -186,13 +188,13 @@ namespace llvm {
LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl, SelectionDAG &DAG) const;
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
- const SmallVectorImpl<EVT> &OutTys,
- const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
- SelectionDAG &DAG) const;
+ const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
+ LLVMContext &Context) const;
};
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 5260258..dd90ea9 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -299,9 +299,8 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
unsigned
XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond)const{
- // FIXME there should probably be a DebugLoc argument here
- DebugLoc dl;
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL)const{
// Shouldn't be a fall through.
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
assert((Cond.size() == 2 || Cond.size() == 0) &&
@@ -310,11 +309,11 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
if (FBB == 0) { // One way branch.
if (Cond.empty()) {
// Unconditional branch
- BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB);
} else {
// Conditional branch.
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
- BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
.addMBB(TBB);
}
return 1;
@@ -323,9 +322,9 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
// Two-way Conditional branch.
assert(Cond.size() == 2 && "Unexpected number of components!");
unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm());
- BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg())
+ BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg())
.addMBB(TBB);
- BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB);
+ BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB);
return 2;
}
@@ -357,37 +356,31 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
return 2;
}
-bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const {
-
- if (DestRC == SrcRC) {
- if (DestRC == XCore::GRRegsRegisterClass) {
- BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
- .addReg(SrcReg)
- .addImm(0);
- return true;
- } else {
- return false;
- }
+void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const {
+ bool GRDest = XCore::GRRegsRegClass.contains(DestReg);
+ bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg);
+
+ if (GRDest && GRSrc) {
+ BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg)
+ .addReg(SrcReg, getKillRegState(KillSrc))
+ .addImm(0);
+ return;
}
- if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP &&
- DestRC == XCore::GRRegsRegisterClass) {
- BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg)
- .addImm(0);
- return true;
+ if (GRDest && SrcReg == XCore::SP) {
+ BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0);
+ return;
}
- if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP &&
- SrcRC == XCore::GRRegsRegisterClass) {
+
+ if (DestReg == XCore::SP && GRSrc) {
BuildMI(MBB, I, DL, get(XCore::SETSP_1r))
- .addReg(SrcReg);
- return true;
+ .addReg(SrcReg, getKillRegState(KillSrc));
+ return;
}
- return false;
+ llvm_unreachable("Impossible reg-to-reg copy");
}
void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -438,8 +431,10 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(it->getReg());
- storeRegToStackSlot(MBB, MI, it->getReg(), true,
- it->getFrameIdx(), it->getRegClass(), &RI);
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ storeRegToStackSlot(MBB, MI, Reg, true,
+ it->getFrameIdx(), RC, &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
@@ -460,10 +455,11 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
--BeforeI;
for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin();
it != CSI.end(); ++it) {
-
+ unsigned Reg = it->getReg();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
loadRegFromStackSlot(MBB, MI, it->getReg(),
it->getFrameIdx(),
- it->getRegClass(), &RI);
+ RC, &RI);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert multiple
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 9035ea9..e5b0171 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -58,17 +58,16 @@ public:
bool AllowModify) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
- MachineBasicBlock *FBB,
- const SmallVectorImpl<MachineOperand> &Cond) const;
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond,
+ DebugLoc DL) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
- virtual bool copyRegToReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SrcReg,
- const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC,
- DebugLoc DL) const;
+ virtual void copyPhysReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ unsigned DestReg, unsigned SrcReg,
+ bool KillSrc) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index dd3cbc1..19b9b1f 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -733,7 +733,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b),
// TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out,
// in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp,
// tsetmr, sext (reg), zext (reg)
-let isTwoAddress = 1 in {
+let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2),
"sext $dst, $src2",
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 0cfb358..2a88342 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -82,18 +82,6 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
return CalleeSavedRegs;
}
-const TargetRegisterClass* const*
-XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
- static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass,
- XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass,
- 0
- };
- return CalleeSavedRegClasses;
-}
-
BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
Reserved.set(XCore::CP);
@@ -320,7 +308,7 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx;
if (! isVarArg) {
// A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false);
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true);
} else {
FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
false);
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 5bdd059..66132ba 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -44,9 +44,6 @@ public:
const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const;
- const TargetRegisterClass* const* getCalleeSavedRegClasses(
- const MachineFunction *MF = 0) const;
-
BitVector getReservedRegs(const MachineFunction &MF) const;
bool requiresRegisterScavenging(const MachineFunction &MF) const;
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index 37d7a00..abfa514 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -28,7 +28,7 @@ namespace {
Hello() : FunctionPass(&ID) {}
virtual bool runOnFunction(Function &F) {
- HelloCounter++;
+ ++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
return false;
@@ -46,7 +46,7 @@ namespace {
Hello2() : FunctionPass(&ID) {}
virtual bool runOnFunction(Function &F) {
- HelloCounter++;
+ ++HelloCounter;
errs() << "Hello: ";
errs().write_escaped(F.getName()) << '\n';
return false;
diff --git a/lib/Transforms/Hello/Hello.exports b/lib/Transforms/Hello/Hello.exports
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/Transforms/Hello/Hello.exports
diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile
index c5e75d4..f1e3148 100644
--- a/lib/Transforms/Hello/Makefile
+++ b/lib/Transforms/Hello/Makefile
@@ -12,5 +12,13 @@ LIBRARYNAME = LLVMHello
LOADABLE_MODULE = 1
USEDLIBS =
+# If we don't need RTTI or EH, there's no reason to export anything
+# from the hello plugin.
+ifneq ($(REQUIRES_RTTI), 1)
+ifneq ($(REQUIRES_EH), 1)
+EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports
+endif
+endif
+
include $(LEVEL)/Makefile.common
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 89f213e..28ea079 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -360,19 +360,20 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
IndicesVector Operands;
for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end();
UI != E; ++UI) {
+ User *U = *UI;
Operands.clear();
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (LI->isVolatile()) return false; // Don't hack volatile loads
Loads.push_back(LI);
// Direct loads are equivalent to a GEP with a zero index and then a load.
Operands.push_back(0);
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
+ } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
if (GEP->use_empty()) {
// Dead GEP's cause trouble later. Just remove them if we run into
// them.
getAnalysis<AliasAnalysis>().deleteValue(GEP);
GEP->eraseFromParent();
- // TODO: This runs the above loop over and over again for dead GEPS
+ // TODO: This runs the above loop over and over again for dead GEPs
// Couldn't we just do increment the UI iterator earlier and erase the
// use?
return isSafeToPromoteArgument(Arg, isByVal);
@@ -452,12 +453,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
// Now check every path from the entry block to the load for transparency.
// To do this, we perform a depth first search on the inverse CFG from the
// loading block.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
for (idf_ext_iterator<BasicBlock*, SmallPtrSet<BasicBlock*, 16> >
- I = idf_ext_begin(*PI, TranspBlocks),
- E = idf_ext_end(*PI, TranspBlocks); I != E; ++I)
+ I = idf_ext_begin(P, TranspBlocks),
+ E = idf_ext_end(P, TranspBlocks); I != E; ++I)
if (AA.canBasicBlockModify(**I, Arg, LoadSize))
return false;
+ }
}
// If the path from the entry of the function to each load is free of
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 692e47d..475eee8 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -120,9 +120,14 @@ namespace {
typedef SmallVector<RetOrArg, 5> UseVector;
+ protected:
+ // DAH uses this to specify a different ID.
+ explicit DAE(void *ID) : ModulePass(ID) {}
+
public:
static char ID; // Pass identification, replacement for typeid
DAE() : ModulePass(&ID) {}
+
bool runOnModule(Module &M);
virtual bool ShouldHackArguments() const { return false; }
@@ -155,6 +160,8 @@ namespace {
/// by bugpoint.
struct DAH : public DAE {
static char ID;
+ DAH() : DAE(&ID) {}
+
virtual bool ShouldHackArguments() const { return true; }
};
}
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index b429213..735a1c4 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -160,13 +160,12 @@ static bool SafeToDestroyConstant(const Constant *C) {
static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
SmallPtrSet<const PHINode*, 16> &PHIUsers) {
for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;
- ++UI)
- if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+ ++UI) {
+ const User *U = *UI;
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(U)) {
GS.HasNonInstructionUser = true;
-
if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
-
- } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) {
+ } else if (const Instruction *I = dyn_cast<Instruction>(U)) {
if (!GS.HasMultipleAccessingFunctions) {
const Function *F = I->getParent()->getParent();
if (GS.AccessingFunction == 0)
@@ -221,18 +220,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
GS.HasPHIUser = true;
} else if (isa<CmpInst>(I)) {
+ // Nothing to analyse.
} else if (isa<MemTransferInst>(I)) {
- if (I->getOperand(1) == V)
+ const MemTransferInst *MTI = cast<MemTransferInst>(I);
+ if (MTI->getArgOperand(0) == V)
GS.StoredType = GlobalStatus::isStored;
- if (I->getOperand(2) == V)
+ if (MTI->getArgOperand(1) == V)
GS.isLoaded = true;
} else if (isa<MemSetInst>(I)) {
- assert(I->getOperand(1) == V && "Memset only takes one pointer!");
+ assert(cast<MemSetInst>(I)->getArgOperand(0) == V &&
+ "Memset only takes one pointer!");
GS.StoredType = GlobalStatus::isStored;
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (const Constant *C = dyn_cast<Constant>(*UI)) {
+ } else if (const Constant *C = dyn_cast<Constant>(U)) {
GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!SafeToDestroyConstant(C))
@@ -242,6 +244,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
// Otherwise must be some other user.
return true;
}
+ }
return false;
}
@@ -1304,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
- NElems,
+ NElems, 0,
CI->getName() + ".f" + Twine(FieldNo));
FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
@@ -1323,8 +1326,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// if (F2) { free(F2); F2 = 0; }
// }
// The malloc can also fail if its argument is too large.
- Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
- Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+ Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0),
ConstantZero, "isneg");
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
@@ -1511,10 +1514,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// If this is an allocation of a fixed size array of structs, analyze as a
// variable size array. malloc [100 x struct],1 -> malloc struct, 100
- if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1))
+ if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1))
if (const ArrayType *AT = dyn_cast<ArrayType>(AllocTy))
AllocTy = AT->getElementType();
-
+
const StructType *AllocSTy = dyn_cast<StructType>(AllocTy);
if (!AllocSTy)
return false;
@@ -1533,7 +1536,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
AllocSize, NumElements,
- CI->getName());
+ 0, CI->getName());
Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
@@ -1597,13 +1600,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
GVElType->isFloatingPointTy() ||
GVElType->isPointerTy() || GVElType->isVectorTy())
return false;
-
+
// Walk the use list of the global seeing if all the uses are load or store.
// If there is anything else, bail out.
- for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I)
- if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){
+ User *U = *I;
+ if (!isa<LoadInst>(U) && !isa<StoreInst>(U))
return false;
-
+ }
+
DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV);
// Create the new global, initializing it to false.
@@ -1641,7 +1646,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// bool.
Instruction *StoredVal = cast<Instruction>(SI->getOperand(0));
- // If we're already replaced the input, StoredVal will be a cast or
+ // If we've already replaced the input, StoredVal will be a cast or
// select instruction. If not, it will be a load of the original
// global.
if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
@@ -2260,8 +2265,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
getVal(Values, CI->getOperand(0)),
CI->getType());
} else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {
- InstResult =
- ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
+ InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)),
getVal(Values, SI->getOperand(1)),
getVal(Values, SI->getOperand(2)));
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {
@@ -2302,7 +2306,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (!Callee) return false; // Cannot resolve.
SmallVector<Constant*, 8> Formals;
- for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end();
+ CallSite CS(CI);
+ for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i)
Formals.push_back(getVal(Values, *i));
diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp
index df2456f..e4db235 100644
--- a/lib/Transforms/IPO/IPConstantPropagation.cpp
+++ b/lib/Transforms/IPO/IPConstantPropagation.cpp
@@ -85,15 +85,16 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) {
unsigned NumNonconstant = 0;
for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) {
+ User *U = *UI;
// Ignore blockaddress uses.
- if (isa<BlockAddress>(*UI)) continue;
+ if (isa<BlockAddress>(U)) continue;
// Used by a non-instruction, or not the callee of a function, do not
// transform.
- if (!isa<CallInst>(*UI) && !isa<InvokeInst>(*UI))
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
return false;
- CallSite CS = CallSite::get(cast<Instruction>(*UI));
+ CallSite CS = CallSite::get(cast<Instruction>(U));
if (!CS.isCallee(UI))
return false;
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index b785bb0..027a220 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -468,7 +468,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// move a call site to a function in this SCC before the
// 'FirstCallInSCC' barrier.
if (SCC.isSingular()) {
- std::swap(CallSites[CSi], CallSites.back());
+ CallSites[CSi] = CallSites.back();
CallSites.pop_back();
} else {
CallSites.erase(CallSites.begin()+CSi);
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 4d61e83..76cfef8 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -42,6 +42,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/InstVisitor.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -262,8 +263,8 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
// char*. It returns "void", so it doesn't need to replace any of
// Inst's uses and doesn't get a name.
CastInst* CI =
- new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst);
- Value *Args[] = { CI, Inst->getOperand(2) };
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
+ Value *Args[] = { CI, Inst->getArgOperand(1) };
CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
@@ -378,7 +379,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
const Type* SBPTy =
Type::getInt8PtrTy(Inst->getContext());
CastInst* BufPtr =
- new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst);
+ new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst);
Value *Args[] = {
GetSetJmpMap(Func), BufPtr,
ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
@@ -405,12 +406,14 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
// Loop over all of the uses of instruction. If any of them are after the
// call, "spill" the value to the stack.
for (Value::use_iterator UI = II->use_begin(), E = II->use_end();
- UI != E; ++UI)
- if (cast<Instruction>(*UI)->getParent() != ABlock ||
- InstrsAfterCall.count(cast<Instruction>(*UI))) {
+ UI != E; ++UI) {
+ User *U = *UI;
+ if (cast<Instruction>(U)->getParent() != ABlock ||
+ InstrsAfterCall.count(cast<Instruction>(U))) {
DemoteRegToStack(*II);
break;
}
+ }
InstrsAfterCall.clear();
// Change the setjmp call into a branch statement. We'll remove the
@@ -473,7 +476,8 @@ void LowerSetJmp::visitCallInst(CallInst& CI)
// Construct the new "invoke" instruction.
TerminatorInst* Term = OldBB->getTerminator();
- std::vector<Value*> Params(CI.op_begin() + 1, CI.op_end());
+ CallSite CS(&CI);
+ std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
InvokeInst* II =
InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
Params.begin(), Params.end(), CI.getName(), Term);
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 622a9b5..55d5e2a 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -146,7 +146,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
switch(Ty1->getTypeID()) {
default:
llvm_unreachable("Unknown type!");
- // Fall through in Release-Asserts mode.
+ // Fall through in Release mode.
case Type::IntegerTyID:
case Type::OpaqueTyID:
// Ty1 == Ty2 would have returned true earlier.
@@ -535,6 +535,7 @@ static LinkageCategory categorize(const Function *F) {
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
case GlobalValue::ExternalWeakLinkage:
+ case GlobalValue::LinkerPrivateWeakLinkage:
return ExternalWeak;
case GlobalValue::ExternalLinkage:
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index 07525ea..6b9814c 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -66,13 +66,13 @@ Function* PartialInliner::unswitchFunction(Function* F) {
return 0;
// Clone the function, so that we can hack away on it.
- DenseMap<const Value*, Value*> ValueMap;
- Function* duplicateFunction = CloneFunction(F, ValueMap);
+ ValueMap<const Value*, Value*> VMap;
+ Function* duplicateFunction = CloneFunction(F, VMap);
duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
F->getParent()->getFunctionList().push_back(duplicateFunction);
- BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]);
- BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]);
- BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]);
+ BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
+ BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
+ BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
// Go ahead and update all uses to the duplicate, so that we can just
// use the inliner functionality when we're done hacking.
diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp
index 084b94e..58e1448 100644
--- a/lib/Transforms/IPO/PartialSpecialization.cpp
+++ b/lib/Transforms/IPO/PartialSpecialization.cpp
@@ -32,6 +32,10 @@
using namespace llvm;
STATISTIC(numSpecialized, "Number of specialized functions created");
+STATISTIC(numReplaced, "Number of callers replaced by specialization");
+
+// Maximum number of arguments markable interested
+static const int MaxInterests = 6;
// Call must be used at least occasionally
static const int CallsMin = 5;
@@ -40,8 +44,9 @@ static const int CallsMin = 5;
static const double ConstValPercent = .1;
namespace {
+ typedef SmallVector<int, MaxInterests> InterestingArgVector;
class PartSpec : public ModulePass {
- void scanForInterest(Function&, SmallVector<int, 6>&);
+ void scanForInterest(Function&, InterestingArgVector&);
int scanDistribution(Function&, int, std::map<Constant*, int>&);
public :
static char ID; // Pass identification, replacement for typeid
@@ -59,13 +64,15 @@ X("partialspecialization", "Partial Specialization");
// a call to the specialized function. Returns the specialized function
static Function*
SpecializeFunction(Function* F,
- DenseMap<const Value*, Value*>& replacements) {
+ ValueMap<const Value*, Value*>& replacements) {
// arg numbers of deleted arguments
- DenseSet<unsigned> deleted;
- for (DenseMap<const Value*, Value*>::iterator
+ DenseMap<unsigned, const Argument*> deleted;
+ for (ValueMap<const Value*, Value*>::iterator
repb = replacements.begin(), repe = replacements.end();
- repb != repe; ++repb)
- deleted.insert(cast<Argument>(repb->first)->getArgNo());
+ repb != repe; ++repb) {
+ Argument const *arg = cast<const Argument>(repb->first);
+ deleted[arg->getArgNo()] = arg;
+ }
Function* NF = CloneFunction(F, replacements);
NF->setLinkage(GlobalValue::InternalLinkage);
@@ -80,9 +87,23 @@ SpecializeFunction(Function* F,
if (CS.getCalledFunction() == F) {
SmallVector<Value*, 6> args;
- for (unsigned x = 0; x < CS.arg_size(); ++x)
- if (!deleted.count(x))
- args.push_back(CS.getArgument(x));
+ // Assemble the non-specialized arguments for the updated callsite.
+ // In the process, make sure that the specialized arguments are
+ // constant and match the specialization. If that's not the case,
+ // this callsite needs to call the original or some other
+ // specialization; don't change it here.
+ CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end();
+ for (CallSite::arg_iterator ai = as; ai != ae; ++ai) {
+ DenseMap<unsigned, const Argument*>::iterator delit = deleted.find(
+ std::distance(as, ai));
+ if (delit == deleted.end())
+ args.push_back(cast<Value>(ai));
+ else {
+ Constant *ci = dyn_cast<Constant>(ai);
+ if (!(ci && ci == replacements[delit->second]))
+ goto next_use;
+ }
+ }
Value* NCall;
if (CallInst *CI = dyn_cast<CallInst>(i)) {
NCall = CallInst::Create(NF, args.begin(), args.end(),
@@ -99,8 +120,11 @@ SpecializeFunction(Function* F,
}
CS.getInstruction()->replaceAllUsesWith(NCall);
CS.getInstruction()->eraseFromParent();
+ ++numReplaced;
}
}
+ next_use:
+ ;
}
return NF;
}
@@ -111,7 +135,7 @@ bool PartSpec::runOnModule(Module &M) {
for (Module::iterator I = M.begin(); I != M.end(); ++I) {
Function &F = *I;
if (F.isDeclaration() || F.mayBeOverridden()) continue;
- SmallVector<int, 6> interestingArgs;
+ InterestingArgVector interestingArgs;
scanForInterest(F, interestingArgs);
// Find the first interesting Argument that we can specialize on
@@ -126,7 +150,7 @@ bool PartSpec::runOnModule(Module &M) {
ee = distribution.end(); ii != ee; ++ii)
if (total > ii->second && ii->first &&
ii->second > total * ConstValPercent) {
- DenseMap<const Value*, Value*> m;
+ ValueMap<const Value*, Value*> m;
Function::arg_iterator arg = F.arg_begin();
for (int y = 0; y < interestingArgs[x]; ++y)
++arg;
@@ -143,7 +167,7 @@ bool PartSpec::runOnModule(Module &M) {
/// scanForInterest - This function decides which arguments would be worth
/// specializing on.
-void PartSpec::scanForInterest(Function& F, SmallVector<int, 6>& args) {
+void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) {
for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end();
ii != ee; ++ii) {
for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end();
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 6bc8e66..12e8db8 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -73,6 +73,19 @@ namespace {
AU.setPreservesAll();
}
};
+
+ class StripDeadDebugInfo : public ModulePass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ explicit StripDeadDebugInfo()
+ : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ };
}
char StripSymbols::ID = 0;
@@ -99,6 +112,14 @@ ModulePass *llvm::createStripDebugDeclarePass() {
return new StripDebugDeclare();
}
+char StripDeadDebugInfo::ID = 0;
+static RegisterPass<StripDeadDebugInfo>
+A("strip-dead-debug-info", "Strip debug info for unused symbols");
+
+ModulePass *llvm::createStripDeadDebugInfoPass() {
+ return new StripDeadDebugInfo();
+}
+
/// OnlyUsedBy - Return true if V is only used by Usr.
static bool OnlyUsedBy(Value *V, Value *Usr) {
for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) {
@@ -223,27 +244,27 @@ static bool StripDebugInfo(Module &M) {
Changed = true;
}
- NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
- if (NMD) {
- Changed = true;
- NMD->eraseFromParent();
- }
-
- NMD = M.getNamedMetadata("llvm.dbg.lv");
- if (NMD) {
- Changed = true;
- NMD->eraseFromParent();
+ for (Module::named_metadata_iterator NMI = M.named_metadata_begin(),
+ NME = M.named_metadata_end(); NMI != NME;) {
+ NamedMDNode *NMD = NMI;
+ ++NMI;
+ if (NMD->getName().startswith("llvm.dbg.")) {
+ NMD->eraseFromParent();
+ Changed = true;
+ }
}
-
+
unsigned MDDbgKind = M.getMDKindID("dbg");
- for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
++FI)
for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
- ++BI)
+ ++BI) {
+ Changed = true; // FIXME: Only set if there was debug metadata.
BI->setMetadata(MDDbgKind, 0);
+ }
- return true;
+ return Changed;
}
bool StripSymbols::runOnModule(Module &M) {
@@ -266,8 +287,8 @@ bool StripDebugDeclare::runOnModule(Module &M) {
if (Declare) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
- Value *Arg1 = CI->getOperand(1);
- Value *Arg2 = CI->getOperand(2);
+ Value *Arg1 = CI->getArgOperand(0);
+ Value *Arg2 = CI->getArgOperand(1);
assert(CI->use_empty() && "llvm.dbg intrinsic should have void result");
CI->eraseFromParent();
if (Arg1->use_empty()) {
@@ -295,3 +316,83 @@ bool StripDebugDeclare::runOnModule(Module &M) {
return true;
}
+
+/// getRealLinkageName - If special LLVM prefix that is used to inform the asm
+/// printer to not emit usual symbol prefix before the symbol name is used then
+/// return linkage name after skipping this special LLVM prefix.
+static StringRef getRealLinkageName(StringRef LinkageName) {
+ char One = '\1';
+ if (LinkageName.startswith(StringRef(&One, 1)))
+ return LinkageName.substr(1);
+ return LinkageName;
+}
+
+bool StripDeadDebugInfo::runOnModule(Module &M) {
+ bool Changed = false;
+
+ // Debugging infomration is encoded in llvm IR using metadata. This is designed
+ // such a way that debug info for symbols preserved even if symbols are
+ // optimized away by the optimizer. This special pass removes debug info for
+ // such symbols.
+
+ // llvm.dbg.gv keeps track of debug info for global variables.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DIGlobalVariable(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(),
+ true)) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv");
+ NMD->addOperand(*I);
+ }
+ else
+ Changed = true;
+ }
+ }
+
+ // llvm.dbg.sp keeps track of debug info for subprograms.
+ if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) {
+ SmallVector<MDNode *, 8> MDs;
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ if (DISubprogram(NMD->getOperand(i)).Verify())
+ MDs.push_back(NMD->getOperand(i));
+ else
+ Changed = true;
+ NMD->eraseFromParent();
+ NMD = NULL;
+
+ for (SmallVector<MDNode *, 8>::iterator I = MDs.begin(),
+ E = MDs.end(); I != E; ++I) {
+ bool FnIsLive = false;
+ if (Function *F = DISubprogram(*I).getFunction())
+ if (M.getFunction(F->getName()))
+ FnIsLive = true;
+ if (FnIsLive) {
+ if (!NMD)
+ NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp");
+ NMD->addOperand(*I);
+ } else {
+ // Remove llvm.dbg.lv.fnname named mdnode which may have been used
+ // to hold debug info for dead function's local variables.
+ StringRef FName = DISubprogram(*I).getLinkageName();
+ if (FName.empty())
+ FName = DISubprogram(*I).getName();
+ if (NamedMDNode *LVNMD =
+ M.getNamedMetadata(Twine("llvm.dbg.lv.",
+ getRealLinkageName(FName))))
+ LVNMD->eraseFromParent();
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp
index 473e83c..a74686f 100644
--- a/lib/Transforms/IPO/StructRetPromotion.cpp
+++ b/lib/Transforms/IPO/StructRetPromotion.cpp
@@ -107,12 +107,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) {
// Check if it is ok to perform this promotion.
if (isSafeToUpdateAllCallers(F) == false) {
DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n");
- NumRejectedSRETUses++;
+ ++NumRejectedSRETUses;
return 0;
}
DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n");
- NumSRET++;
+ ++NumSRET;
// [1] Replace use of sret parameter
AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv",
F->getEntryBlock().begin());
@@ -171,16 +171,16 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) {
// Check FirstArg's users.
for (Value::use_iterator ArgI = FirstArg->use_begin(),
ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) {
-
+ User *U = *ArgI;
// If FirstArg user is a CallInst that does not correspond to current
// call site then this function F is not suitable for sret promotion.
- if (CallInst *CI = dyn_cast<CallInst>(ArgI)) {
+ if (CallInst *CI = dyn_cast<CallInst>(U)) {
if (CI != Call)
return false;
}
// If FirstArg user is a GEP whose all users are not LoadInst then
// this function F is not suitable for sret promotion.
- else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(ArgI)) {
+ else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
// TODO : Use dom info and insert PHINodes to collect get results
// from multiple call sites for this GEP.
if (GEP->getParent() != Call->getParent())
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index c7b04a4..24e0528 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -178,7 +178,8 @@ public:
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
- Instruction *visitFree(Instruction &FI);
+ Instruction *visitMalloc(Instruction &FI);
+ Instruction *visitFree(CallInst &FI);
Instruction *visitLoadInst(LoadInst &LI);
Instruction *visitStoreInst(StoreInst &SI);
Instruction *visitBranchInst(BranchInst &BI);
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 8586054..3f4a857 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1584,6 +1584,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if ((match(A, m_Not(m_Specific(B))) &&
match(D, m_Not(m_Specific(C)))))
return BinaryOperator::CreateXor(C, B);
+
+ // ((A|B)&1)|(B&-2) -> (A&1) | B
+ if (match(A, m_Or(m_Value(V1), m_Specific(B))) ||
+ match(A, m_Or(m_Specific(B), m_Value(V1)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C);
+ if (Ret) return Ret;
+ }
+ // (B&-2)|((A|B)&1) -> (A&1) | B
+ if (match(B, m_Or(m_Specific(A), m_Value(V1))) ||
+ match(B, m_Or(m_Value(V1), m_Specific(A)))) {
+ Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D);
+ if (Ret) return Ret;
+ }
}
// (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts.
@@ -1599,19 +1612,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
}
}
- // ((A|B)&1)|(B&-2) -> (A&1) | B
- if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
- match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
- Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C);
- if (Ret) return Ret;
- }
- // (B&-2)|((A|B)&1) -> (A&1) | B
- if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) ||
- match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) {
- Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C);
- if (Ret) return Ret;
- }
-
// (~A | ~B) == (~(A & B)) - De Morgan's Law
if (Value *Op0NotVal = dyn_castNotVal(Op0))
if (Value *Op1NotVal = dyn_castNotVal(Op1))
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 38e7b6e..85251a8 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -112,8 +112,8 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V,
}
Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
- unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1));
- unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2));
+ unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0));
+ unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1));
unsigned MinAlign = std::min(DstAlign, SrcAlign);
unsigned CopyAlign = MI->getAlignment();
@@ -125,7 +125,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
// load/store.
- ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getOperand(3));
+ ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getArgOperand(2));
if (MemOpLength == 0) return 0;
// Source and destination pointer types are always "i8*" for intrinsic. See
@@ -140,9 +140,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// Use an integer load+store unless we can find something better.
unsigned SrcAddrSp =
- cast<PointerType>(MI->getOperand(2)->getType())->getAddressSpace();
+ cast<PointerType>(MI->getArgOperand(1)->getType())->getAddressSpace();
unsigned DstAddrSp =
- cast<PointerType>(MI->getOperand(1)->getType())->getAddressSpace();
+ cast<PointerType>(MI->getArgOperand(0)->getType())->getAddressSpace();
const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp);
@@ -154,8 +154,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
// an i64 load+store, here because this improves the odds that the source or
// dest address will be promotable. See if we can find a better type than the
// integer datatype.
- Value *StrippedDest = MI->getOperand(1)->stripPointerCasts();
- if (StrippedDest != MI->getOperand(1)) {
+ Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts();
+ if (StrippedDest != MI->getArgOperand(0)) {
const Type *SrcETy = cast<PointerType>(StrippedDest->getType())
->getElementType();
if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) {
@@ -189,15 +189,15 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
SrcAlign = std::max(SrcAlign, CopyAlign);
DstAlign = std::max(DstAlign, CopyAlign);
- Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy);
- Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy);
+ Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy);
+ Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy);
Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign);
InsertNewInstBefore(L, *MI);
InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign),
*MI);
// Set the size of the copy to 0, it will be deleted on the next iteration.
- MI->setOperand(3, Constant::getNullValue(MemOpLength->getType()));
+ MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
return MI;
}
@@ -250,6 +250,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (isFreeCall(&CI))
return visitFree(CI);
+ if (isMalloc(&CI))
+ return visitMalloc(CI);
// If the caller function is nounwind, mark the call as nounwind, even if the
// callee isn't.
@@ -261,7 +263,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
IntrinsicInst *II = dyn_cast<IntrinsicInst>(&CI);
if (!II) return visitCallSite(&CI);
-
+
// Intrinsics cannot occur in an invoke, so handle them here instead of in
// visitCallSite.
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(II)) {
@@ -287,11 +289,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (GVSrc->isConstant()) {
Module *M = CI.getParent()->getParent()->getParent();
Intrinsic::ID MemCpyID = Intrinsic::memcpy;
- const Type *Tys[3] = { CI.getOperand(1)->getType(),
- CI.getOperand(2)->getType(),
- CI.getOperand(3)->getType() };
- CI.setCalledFunction(
- Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+ const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+ CI.getArgOperand(1)->getType(),
+ CI.getArgOperand(2)->getType() };
+ CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
Changed = true;
}
}
@@ -311,7 +312,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (Instruction *I = SimplifyMemSet(MSI))
return I;
}
-
+
if (Changed) return II;
}
@@ -322,10 +323,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (!TD) break;
const Type *ReturnTy = CI.getType();
- bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
// Get to the real allocated thing and offset as fast as possible.
- Value *Op1 = II->getOperand(1)->stripPointerCasts();
+ Value *Op1 = II->getArgOperand(0)->stripPointerCasts();
// If we've stripped down to a single global variable that we
// can know the size of then just return that.
@@ -393,7 +394,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
return ReplaceInstUsesWith(CI, RetVal);
-
}
// Do not return "I don't know" here. Later optimization passes could
@@ -402,45 +402,45 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
case Intrinsic::bswap:
// bswap(bswap(x)) -> x
- if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getOperand(1)))
+ if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(II->getArgOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap)
- return ReplaceInstUsesWith(CI, Operand->getOperand(1));
+ return ReplaceInstUsesWith(CI, Operand->getArgOperand(0));
// bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
- if (TruncInst *TI = dyn_cast<TruncInst>(II->getOperand(1))) {
+ if (TruncInst *TI = dyn_cast<TruncInst>(II->getArgOperand(0))) {
if (IntrinsicInst *Operand = dyn_cast<IntrinsicInst>(TI->getOperand(0)))
if (Operand->getIntrinsicID() == Intrinsic::bswap) {
unsigned C = Operand->getType()->getPrimitiveSizeInBits() -
TI->getType()->getPrimitiveSizeInBits();
Value *CV = ConstantInt::get(Operand->getType(), C);
- Value *V = Builder->CreateLShr(Operand->getOperand(1), CV);
+ Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV);
return new TruncInst(V, TI->getType());
}
}
break;
case Intrinsic::powi:
- if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// powi(x, 0) -> 1.0
if (Power->isZero())
return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0));
// powi(x, 1) -> x
if (Power->isOne())
- return ReplaceInstUsesWith(CI, II->getOperand(1));
+ return ReplaceInstUsesWith(CI, II->getArgOperand(0));
// powi(x, -1) -> 1/x
if (Power->isAllOnesValue())
return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0),
- II->getOperand(1));
+ II->getArgOperand(0));
}
break;
case Intrinsic::cttz: {
// If all bits below the first known one are known zero,
// this value is constant.
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
KnownZero, KnownOne);
unsigned TrailingZeros = KnownOne.countTrailingZeros();
APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
@@ -453,11 +453,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ctlz: {
// If all bits above the first known one are known zero,
// this value is constant.
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt KnownZero(BitWidth, 0);
APInt KnownOne(BitWidth, 0);
- ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth),
+ ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth),
KnownZero, KnownOne);
unsigned LeadingZeros = KnownOne.countLeadingZeros();
APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
@@ -468,8 +468,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
case Intrinsic::uadd_with_overflow: {
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
- const IntegerType *IT = cast<IntegerType>(II->getOperand(1)->getType());
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+ const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
uint32_t BitWidth = IT->getBitWidth();
APInt Mask = APInt::getSignBit(BitWidth);
APInt LHSKnownZero(BitWidth, 0);
@@ -513,19 +513,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// FALL THROUGH uadd into sadd
case Intrinsic::sadd_with_overflow:
// Canonicalize constants into the RHS.
- if (isa<Constant>(II->getOperand(1)) &&
- !isa<Constant>(II->getOperand(2))) {
- Value *LHS = II->getOperand(1);
- II->setOperand(1, II->getOperand(2));
- II->setOperand(2, LHS);
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
return II;
}
// X + undef -> undef
- if (isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X + 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
@@ -533,7 +533,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
@@ -541,38 +541,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ssub_with_overflow:
// undef - X -> undef
// X - undef -> undef
- if (isa<UndefValue>(II->getOperand(1)) ||
- isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(0)) ||
+ isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHS = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X - 0 -> {X, false}
if (RHS->isZero()) {
Constant *V[] = {
- UndefValue::get(II->getOperand(1)->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
// Canonicalize constants into the RHS.
- if (isa<Constant>(II->getOperand(1)) &&
- !isa<Constant>(II->getOperand(2))) {
- Value *LHS = II->getOperand(1);
- II->setOperand(1, II->getOperand(2));
- II->setOperand(2, LHS);
+ if (isa<Constant>(II->getArgOperand(0)) &&
+ !isa<Constant>(II->getArgOperand(1))) {
+ Value *LHS = II->getArgOperand(0);
+ II->setArgOperand(0, II->getArgOperand(1));
+ II->setArgOperand(1, LHS);
return II;
}
// X * undef -> undef
- if (isa<UndefValue>(II->getOperand(2)))
+ if (isa<UndefValue>(II->getArgOperand(1)))
return ReplaceInstUsesWith(CI, UndefValue::get(II->getType()));
- if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getOperand(2))) {
+ if (ConstantInt *RHSI = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
// X*0 -> {0, false}
if (RHSI->isZero())
return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType()));
@@ -580,11 +580,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// X * 1 -> {X, false}
if (RHSI->equalsInt(1)) {
Constant *V[] = {
- UndefValue::get(II->getOperand(1)->getType()),
+ UndefValue::get(II->getArgOperand(0)->getType()),
ConstantInt::getFalse(II->getContext())
};
Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
- return InsertValueInst::Create(Struct, II->getOperand(1), 0);
+ return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
}
}
break;
@@ -595,8 +595,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::x86_sse2_loadu_dq:
// Turn PPC lvx -> load if the pointer is known aligned.
// Turn X86 loadups -> load if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
- Value *Ptr = Builder->CreateBitCast(II->getOperand(1),
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
PointerType::getUnqual(II->getType()));
return new LoadInst(Ptr);
}
@@ -604,22 +604,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_stvx:
case Intrinsic::ppc_altivec_stvxl:
// Turn stvx -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) {
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) {
const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(1)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy);
- return new StoreInst(II->getOperand(1), Ptr);
+ PointerType::getUnqual(II->getArgOperand(0)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
+ return new StoreInst(II->getArgOperand(0), Ptr);
}
break;
case Intrinsic::x86_sse_storeu_ps:
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
// Turn X86 storeu -> store if the pointer is known aligned.
- if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) {
+ if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) {
const Type *OpPtrTy =
- PointerType::getUnqual(II->getOperand(2)->getType());
- Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy);
- return new StoreInst(II->getOperand(2), Ptr);
+ PointerType::getUnqual(II->getArgOperand(1)->getType());
+ Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy);
+ return new StoreInst(II->getArgOperand(1), Ptr);
}
break;
@@ -627,12 +627,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// These intrinsics only demands the 0th element of its input vector. If
// we can simplify the input based on that, do so now.
unsigned VWidth =
- cast<VectorType>(II->getOperand(1)->getType())->getNumElements();
+ cast<VectorType>(II->getArgOperand(0)->getType())->getNumElements();
APInt DemandedElts(VWidth, 1);
APInt UndefElts(VWidth, 0);
- if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts)) {
- II->setOperand(1, V);
+ II->setArgOperand(0, V);
return II;
}
break;
@@ -640,7 +640,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::ppc_altivec_vperm:
// Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant.
- if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getOperand(3))) {
+ if (ConstantVector *Mask = dyn_cast<ConstantVector>(II->getArgOperand(2))) {
assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!");
// Check that all of the elements are integer constants or undefs.
@@ -655,8 +655,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (AllEltsOk) {
// Cast the input vectors to byte vectors.
- Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType());
- Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType());
+ Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType());
+ Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType());
Value *Result = UndefValue::get(Op0->getType());
// Only extract each element once.
@@ -689,7 +689,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
case Intrinsic::stackrestore: {
// If the save is right next to the restore, remove the restore. This can
// happen when variable allocas are DCE'd.
- if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getOperand(1))) {
+ if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
if (SS->getIntrinsicID() == Intrinsic::stacksave) {
BasicBlock::iterator BI = SS;
if (&*++BI == II)
@@ -772,13 +772,13 @@ protected:
NewInstruction = IC->ReplaceInstUsesWith(*CI, With);
}
bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp))) {
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) {
if (SizeCI->isAllOnesValue())
return true;
if (isString)
return SizeCI->getZExtValue() >=
- GetStringLength(CI->getOperand(SizeArgOp));
- if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
+ GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset));
+ if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)))
return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
return false;
@@ -846,7 +846,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
UndefValue::get(Type::getInt1PtrTy(Callee->getContext())),
CS.getInstruction());
- // If CS dues not return void then replaceAllUsesWith undef.
+ // If CS does not return void then replaceAllUsesWith undef.
// This allows ValueHandlers and custom metadata to adjust itself.
if (!CS.getInstruction()->getType()->isVoidTy())
CS.getInstruction()->
@@ -1140,7 +1140,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
IntrinsicInst *Tramp =
cast<IntrinsicInst>(cast<BitCastInst>(Callee)->getOperand(0));
- Function *NestF = cast<Function>(Tramp->getOperand(2)->stripPointerCasts());
+ Function *NestF = cast<Function>(Tramp->getArgOperand(1)->stripPointerCasts());
const PointerType *NestFPTy = cast<PointerType>(NestF->getType());
const FunctionType *NestFTy = cast<FunctionType>(NestFPTy->getElementType());
@@ -1181,7 +1181,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
do {
if (Idx == NestIdx) {
// Add the chain argument and attributes.
- Value *NestVal = Tramp->getOperand(3);
+ Value *NestVal = Tramp->getArgOperand(2);
if (NestVal->getType() != NestTy)
NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller);
NewArgs.push_back(NestVal);
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index b0137c4..505a0bf 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -22,19 +22,18 @@ using namespace PatternMatch;
/// X*Scale+Offset.
///
static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
- int &Offset) {
- assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!");
+ uint64_t &Offset) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
Offset = CI->getZExtValue();
Scale = 0;
- return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0);
+ return ConstantInt::get(Val->getType(), 0);
}
if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
if (I->getOpcode() == Instruction::Shl) {
// This is a value scaled by '1 << the shift amt'.
- Scale = 1U << RHS->getZExtValue();
+ Scale = UINT64_C(1) << RHS->getZExtValue();
Offset = 0;
return I->getOperand(0);
}
@@ -100,7 +99,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// See if we can satisfy the modulus by pulling a scale out of the array
// size argument.
unsigned ArraySizeScale;
- int ArrayOffset;
+ uint64_t ArrayOffset;
Value *NumElements = // See if the array size is a decomposable linear expr.
DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset);
@@ -114,13 +113,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
if (Scale == 1) {
Amt = NumElements;
} else {
- Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale);
+ Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale);
// Insert before the alloca, not before the cast.
Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp");
}
- if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
- Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()),
+ if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) {
+ Value *Off = ConstantInt::get(AI.getArraySize()->getType(),
Offset, true);
Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp");
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 861cf92..6c00586 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1423,7 +1423,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
switch (II->getIntrinsicID()) {
case Intrinsic::bswap:
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap()));
return &ICI;
case Intrinsic::ctlz:
@@ -1431,7 +1431,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// ctz(A) == bitwidth(a) -> A == 0 and likewise for !=
if (RHSV == RHS->getType()->getBitWidth()) {
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
return &ICI;
}
@@ -1440,13 +1440,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
// popcount(A) == 0 -> A == 0 and likewise for !=
if (RHS->isZero()) {
Worklist.Add(II);
- ICI.setOperand(0, II->getOperand(1));
+ ICI.setOperand(0, II->getArgOperand(0));
ICI.setOperand(1, RHS);
return &ICI;
}
break;
default:
- break;
+ break;
}
}
}
@@ -1924,35 +1924,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
}
break;
}
- case Instruction::Call:
- // If we have (malloc != null), and if the malloc has a single use, we
- // can assume it is successful and remove the malloc.
- if (isMalloc(LHSI) && LHSI->hasOneUse() &&
- isa<ConstantPointerNull>(RHSC)) {
- // Need to explicitly erase malloc call here, instead of adding it to
- // Worklist, because it won't get DCE'd from the Worklist since
- // isInstructionTriviallyDead() returns false for function calls.
- // It is OK to replace LHSI/MallocCall with Undef because the
- // instruction that uses it will be erased via Worklist.
- if (extractMallocCall(LHSI)) {
- LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType()));
- EraseInstFromFunction(*LHSI);
- return ReplaceInstUsesWith(I,
- ConstantInt::get(Type::getInt1Ty(I.getContext()),
- !I.isTrueWhenEqual()));
- }
- if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI))
- if (MallocCall->hasOneUse()) {
- MallocCall->replaceAllUsesWith(
- UndefValue::get(MallocCall->getType()));
- EraseInstFromFunction(*MallocCall);
- Worklist.Add(LHSI); // The malloc's bitcast use.
- return ReplaceInstUsesWith(I,
- ConstantInt::get(Type::getInt1Ty(I.getContext()),
- !I.isTrueWhenEqual()));
- }
- }
- break;
case Instruction::IntToPtr:
// icmp pred inttoptr(X), null -> icmp pred X, 0
if (RHSC->isNullValue() && TD &&
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0f2a24f..8933a0b 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -13,6 +13,7 @@
#include "InstCombine.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
@@ -22,6 +23,18 @@ using namespace llvm;
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
+ // Ensure that the alloca array size argument has type intptr_t, so that
+ // any casting is exposed early.
+ if (TD) {
+ const Type *IntPtrTy = TD->getIntPtrType(AI.getContext());
+ if (AI.getArraySize()->getType() != IntPtrTy) {
+ Value *V = Builder->CreateIntCast(AI.getArraySize(),
+ IntPtrTy, false);
+ AI.setOperand(0, V);
+ return &AI;
+ }
+ }
+
// Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1
if (AI.isArrayAllocation()) { // Check C != 1
if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
@@ -352,10 +365,11 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) {
return 0;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
UI != E; ++UI) {
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI))
+ User *U = *UI;
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U))
return DI;
- if (isa<BitCastInst>(UI) && UI->hasOneUse()) {
- if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(UI->use_begin()))
+ if (isa<BitCastInst>(U) && U->hasOneUse()) {
+ if (DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(U->use_begin()))
return DI;
}
}
@@ -511,17 +525,20 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
// Determine whether Dest has exactly two predecessors and, if so, compute
// the other predecessor.
pred_iterator PI = pred_begin(DestBB);
+ BasicBlock *P = *PI;
BasicBlock *OtherBB = 0;
- if (*PI != StoreBB)
- OtherBB = *PI;
- ++PI;
- if (PI == pred_end(DestBB))
+
+ if (P != StoreBB)
+ OtherBB = P;
+
+ if (++PI == pred_end(DestBB))
return false;
- if (*PI != StoreBB) {
+ P = *PI;
+ if (P != StoreBB) {
if (OtherBB)
return false;
- OtherBB = *PI;
+ OtherBB = P;
}
if (++PI != pred_end(DestBB))
return false;
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 65f0393..f7fc62f 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -230,8 +230,9 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) {
bool isAddressTaken = false;
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
UI != E; ++UI) {
- if (isa<LoadInst>(UI)) continue;
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ User *U = *UI;
+ if (isa<LoadInst>(U)) continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
// If storing TO the alloca, then the address isn't taken.
if (SI->getOperand(1) == AI) continue;
}
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c958cde..c44fe9d 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -329,6 +329,37 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
}
}
+ // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
+ // and (X <s 0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
+ // FIXME: Type and constness constraints could be lifted, but we have to
+ // watch code size carefully. We should consider xor instead of
+ // sub/add when we decide to do that.
+ if (const IntegerType *Ty = dyn_cast<IntegerType>(CmpLHS->getType())) {
+ if (TrueVal->getType() == Ty) {
+ if (ConstantInt *Cmp = dyn_cast<ConstantInt>(CmpRHS)) {
+ ConstantInt *C1 = NULL, *C2 = NULL;
+ if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) {
+ C1 = dyn_cast<ConstantInt>(TrueVal);
+ C2 = dyn_cast<ConstantInt>(FalseVal);
+ } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) {
+ C1 = dyn_cast<ConstantInt>(FalseVal);
+ C2 = dyn_cast<ConstantInt>(TrueVal);
+ }
+ if (C1 && C2) {
+ // This shift results in either -1 or 0.
+ Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1);
+
+ // Check if we can express the operation with a single or.
+ if (C2->isAllOnesValue())
+ return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1));
+
+ Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue());
+ return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1));
+ }
+ }
+ }
+ }
+
if (CmpLHS == TrueVal && CmpRHS == FalseVal) {
// Transform (X == Y) ? X : Y -> Y
if (Pred == ICmpInst::ICMP_EQ)
diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 836bda3..e5ce8a6 100644
--- a/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -404,7 +404,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){
bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop;
Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0);
- Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS);
+ Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS);
return new ZExtInst(Cmp, II->getType());
}
}
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index cd41844..adf7a76 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -732,10 +732,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
// the right place.
Instruction *NewVal;
if (InputBit > ResultBit)
- NewVal = BinaryOperator::CreateLShr(I->getOperand(1),
+ NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
ConstantInt::get(I->getType(), InputBit-ResultBit));
else
- NewVal = BinaryOperator::CreateShl(I->getOperand(1),
+ NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
ConstantInt::get(I->getType(), ResultBit-InputBit));
NewVal->takeName(I);
return InsertNewInstBefore(NewVal, *I);
@@ -1052,12 +1052,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_mul_sd:
case Intrinsic::x86_sse2_min_sd:
case Intrinsic::x86_sse2_max_sd:
- TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts,
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
UndefElts, Depth+1);
- if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts,
+ if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
UndefElts2, Depth+1);
- if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; }
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If only the low elt is demanded and this is a scalarizable intrinsic,
// scalarize it now.
@@ -1069,8 +1069,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
case Intrinsic::x86_sse2_sub_sd:
case Intrinsic::x86_sse2_mul_sd:
// TODO: Lower MIN/MAX/ABS/etc
- Value *LHS = II->getOperand(1);
- Value *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0);
+ Value *RHS = II->getArgOperand(1);
// Extract the element as scalars.
LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS,
ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index af9ec5c..af2958f 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -710,8 +710,55 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return 0;
}
-Instruction *InstCombiner::visitFree(Instruction &FI) {
- Value *Op = FI.getOperand(1);
+
+
+static bool IsOnlyNullComparedAndFreed(const Value &V) {
+ for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end();
+ UI != UE; ++UI) {
+ const User *U = *UI;
+ if (isFreeCall(U))
+ continue;
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(U))
+ if (ICI->isEquality() && isa<ConstantPointerNull>(ICI->getOperand(1)))
+ continue;
+ return false;
+ }
+ return true;
+}
+
+Instruction *InstCombiner::visitMalloc(Instruction &MI) {
+ // If we have a malloc call which is only used in any amount of comparisons
+ // to null and free calls, delete the calls and replace the comparisons with
+ // true or false as appropriate.
+ if (IsOnlyNullComparedAndFreed(MI)) {
+ for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end();
+ UI != UE;) {
+ // We can assume that every remaining use is a free call or an icmp eq/ne
+ // to null, so the cast is safe.
+ Instruction *I = cast<Instruction>(*UI);
+
+ // Early increment here, as we're about to get rid of the user.
+ ++UI;
+
+ if (isFreeCall(I)) {
+ EraseInstFromFunction(*cast<CallInst>(I));
+ continue;
+ }
+ // Again, the cast is safe.
+ ICmpInst *C = cast<ICmpInst>(I);
+ ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()),
+ C->isFalseWhenEqual()));
+ EraseInstFromFunction(*C);
+ }
+ return EraseInstFromFunction(MI);
+ }
+ return 0;
+}
+
+
+
+Instruction *InstCombiner::visitFree(CallInst &FI) {
+ Value *Op = FI.getArgOperand(0);
// free undef -> unreachable.
if (isa<UndefValue>(Op)) {
@@ -726,23 +773,6 @@ Instruction *InstCombiner::visitFree(Instruction &FI) {
if (isa<ConstantPointerNull>(Op))
return EraseInstFromFunction(FI);
- // If we have a malloc call whose only use is a free call, delete both.
- if (isMalloc(Op)) {
- if (CallInst* CI = extractMallocCallFromBitCast(Op)) {
- if (Op->hasOneUse() && CI->hasOneUse()) {
- EraseInstFromFunction(FI);
- EraseInstFromFunction(*CI);
- return EraseInstFromFunction(*cast<Instruction>(Op));
- }
- } else {
- // Op is a call to malloc
- if (Op->hasOneUse()) {
- EraseInstFromFunction(FI);
- return EraseInstFromFunction(*cast<Instruction>(Op));
- }
- }
- }
-
return 0;
}
@@ -896,7 +926,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
// We're extracting from an intrinsic, see if we're the only user, which
// allows us to simplify multiple result intrinsics to simpler things that
- // just get one value..
+ // just get one value.
if (II->hasOneUse()) {
// Check if we're grabbing the overflow bit or the result of a 'with
// overflow' intrinsic. If it's the latter we can remove the intrinsic
@@ -905,7 +935,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateAdd(LHS, RHS);
@@ -914,7 +944,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
case Intrinsic::usub_with_overflow:
case Intrinsic::ssub_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateSub(LHS, RHS);
@@ -923,7 +953,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
case Intrinsic::umul_with_overflow:
case Intrinsic::smul_with_overflow:
if (*EV.idx_begin() == 0) { // Normal result.
- Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
II->replaceAllUsesWith(UndefValue::get(II->getType()));
EraseInstFromFunction(*II);
return BinaryOperator::CreateMul(LHS, RHS);
diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
index 5650150..41e3a39 100644
--- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
+++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp
@@ -143,7 +143,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
printEdgeCounter(edge,entry,i);
- IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
Initializer[i++] = (Uncounted);
@@ -166,7 +166,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0);
if (!std::binary_search(MST.begin(), MST.end(), edge)) {
printEdgeCounter(edge,BB,i);
- IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
Initializer[i++] = (Zero);
} else{
Initializer[i++] = (Uncounted);
@@ -189,11 +189,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) {
if (TI->getNumSuccessors() == 1) {
// Insert counter at the start of the block
printEdgeCounter(edge,BB,i);
- IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted;
} else {
// Insert counter at the start of the block
printEdgeCounter(edge,Succ,i);
- IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++;
+ IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted;
}
Initializer[i++] = (Zero);
} else {
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 8662a82..1a30e9b 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -61,8 +61,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
- Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
- "newargc", InsertPos);
+ CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
+ "newargc", InsertPos);
// If argc or argv are not available in main, just pass null values in.
Function::arg_iterator AI;
@@ -73,10 +73,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
if (AI->getType() != ArgVTy) {
Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy,
false);
- InitCall->setOperand(2,
+ InitCall->setArgOperand(1,
CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall));
} else {
- InitCall->setOperand(2, AI);
+ InitCall->setArgOperand(1, AI);
}
/* FALL THROUGH */
@@ -93,12 +93,12 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
}
opcode = CastInst::getCastOpcode(AI, true,
Type::getInt32Ty(Context), true);
- InitCall->setOperand(1,
+ InitCall->setArgOperand(0,
CastInst::Create(opcode, AI, Type::getInt32Ty(Context),
"argc.cast", InitCall));
} else {
AI->replaceAllUsesWith(InitCall);
- InitCall->setOperand(1, AI);
+ InitCall->setArgOperand(0, AI);
}
case 0: break;
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index 6135992..dcf14a6 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -230,7 +230,7 @@ class ABCD : public FunctionPass {
DenseMapIterator<Value*, MemoizedResultChart> begin = map.begin();
DenseMapIterator<Value*, MemoizedResultChart> end = map.end();
for (; begin != end; ++begin) {
- begin->second.clear();
+ begin->second.clear();
}
map.clear();
}
@@ -396,8 +396,8 @@ class ABCD : public FunctionPass {
/// this case the method returns true, otherwise false. It also obtains the
/// Instruction and ConstantInt from the BinaryOperator and returns it.
bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1,
- Instruction **I2, ConstantInt **C1,
- ConstantInt **C2);
+ Instruction **I2, ConstantInt **C1,
+ ConstantInt **C2);
/// This method creates a constraint between a Sigma and an Instruction.
/// These constraints are created as soon as we find a comparator that uses a
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 5a49841..2d19467 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) {
for (SmallVector<Instruction*, 1024>::iterator I = worklist.begin(),
E = worklist.end(); I != E; ++I) {
- NumRemoved++;
+ ++NumRemoved;
(*I)->eraseFromParent();
}
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 93e9bfb..272066c 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -548,7 +548,8 @@ protected:
CI->eraseFromParent();
}
bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
- if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(SizeCIOp)))
+ if (ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp
+ - CallInst::ArgOffset)))
return SizeCI->isAllOnesValue();
return false;
}
@@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
// Lower all uses of llvm.objectsize.*
IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
- bool Min = (cast<ConstantInt>(II->getOperand(2))->getZExtValue() == 1);
+ bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
const Type *ReturnTy = CI->getType();
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
CI->replaceAllUsesWith(RetVal);
@@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS,
}
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue(),
- OpInfo.ConstraintType == TargetLowering::C_Memory);
+ TLI->ComputeConstraintToUse(OpInfo, SDValue());
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
OpInfo.isIndirect) {
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 09c01d3..e047e4f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -56,7 +56,8 @@ namespace {
}
bool runOnBasicBlock(BasicBlock &BB);
- bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep);
+ bool handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep);
bool handleEndBlock(BasicBlock &BB);
bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize,
BasicBlock::iterator &BBI,
@@ -73,7 +74,6 @@ namespace {
AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>();
AU.addPreserved<DominatorTree>();
- AU.addPreserved<AliasAnalysis>();
AU.addPreserved<MemoryDependenceAnalysis>();
}
@@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) {
if (StoreInst *SI = dyn_cast<StoreInst>(I))
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
- return MI->getOperand(1);
-
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ return MI->getArgOperand(0);
+
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
- return I->getOperand(1);
+ return II->getArgOperand(0);
case Intrinsic::lifetime_end:
- return I->getOperand(2);
+ return II->getArgOperand(1);
}
}
@@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
Len = MI->getLength();
} else {
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
default: assert(false && "Unexpected intrinsic!");
case Intrinsic::init_trampoline:
return -1u;
case Intrinsic::lifetime_end:
- Len = I->getOperand(1);
+ Len = II->getArgOperand(0);
break;
}
}
@@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
if (InstDep.isNonLocal()) continue;
// Handle frees whose dependencies are non-trivial.
- if (isFreeCall(Inst)) {
- MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep);
+ if (const CallInst *F = isFreeCall(Inst)) {
+ MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep);
continue;
}
@@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
// DeleteDeadInstruction can delete the current instruction in loop
@@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
BBI = BB.begin();
else if (BBI != BB.begin()) // Revisit this instruction if possible.
--BBI;
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
/// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose
/// dependency is a store to a field of that structure.
-bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
+bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F,
+ MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
Instruction *Dependency = Dep.getInst();
@@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
- if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
+ if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) !=
AliasAnalysis::MustAlias)
return false;
// DCE instructions only used to calculate that store
DeleteDeadInstruction(Dependency);
- NumFastStores++;
+ ++NumFastStores;
return true;
}
@@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
Instruction *Dead = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Dead, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
}
@@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// However, if this load is unused and not volatile, we can go ahead and
// remove it, and not have to worry about it making our pointer undead!
if (L->use_empty() && !L->isVolatile()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(L, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
// Dead alloca's can be DCE'd when we reach them
if (A->use_empty()) {
- BBI++;
+ ++BBI;
DeleteDeadInstruction(A, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
}
@@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
getPointerSize(*I));
if (A == AliasAnalysis::ModRef)
- modRef++;
+ ++modRef;
else
- other++;
+ ++other;
if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref)
dead.push_back(*I);
@@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
} else if (isInstructionTriviallyDead(BBI)) {
// For any non-memory-affecting non-terminators, DCE them as we reach them
Instruction *Inst = BBI;
- BBI++;
+ ++BBI;
DeleteDeadInstruction(Inst, &deadPointers);
- NumFastOther++;
+ ++NumFastOther;
MadeChange = true;
continue;
}
@@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize,
// Remove it!
++BBI;
DeleteDeadInstruction(S, &deadPointers);
- NumFastStores++;
+ ++NumFastStores;
MadeChange = true;
continue;
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index ca8ab49..88b6776 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -35,6 +35,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PHITransAddr.h"
@@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) {
e.function = C->getCalledFunction();
e.opcode = Expression::CALL;
- for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end();
+ CallSite CS(C);
+ for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end();
I != E; ++I)
e.varargs.push_back(lookup_or_add(*I));
@@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
if (local_dep.isDef()) {
CallInst* local_cdep = cast<CallInst>(local_dep.getInst());
- if (local_cdep->getNumOperands() != C->getNumOperands()) {
+ if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
return nextValueNumber++;
}
- if (cdep->getNumOperands() != C->getNumOperands()) {
+ if (cdep->getNumArgOperands() != C->getNumArgOperands()) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
}
- for (unsigned i = 1; i < C->getNumOperands(); ++i) {
- uint32_t c_vn = lookup_or_add(C->getOperand(i));
- uint32_t cd_vn = lookup_or_add(cdep->getOperand(i));
+ for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) {
+ uint32_t c_vn = lookup_or_add(C->getArgOperand(i));
+ uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i));
if (c_vn != cd_vn) {
valueNumbering[C] = nextValueNumber;
return nextValueNumber++;
@@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
MD->invalidateCachedPointerInfo(V);
VN.erase(LI);
toErase.push_back(LI);
- NumPRELoad++;
+ ++NumPRELoad;
return true;
}
@@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(AvailVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(StoredVal);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
MD->invalidateCachedPointerInfo(DepLI);
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
@@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
VN.erase(L);
toErase.push_back(L);
- NumGVNLoad++;
+ ++NumGVNLoad;
return true;
}
}
@@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) {
BasicBlock *BB = FI;
++FI;
bool removedBlock = MergeBlockIntoPredecessor(BB, this);
- if (removedBlock) NumGVNBlocks++;
+ if (removedBlock) ++NumGVNBlocks;
Changed |= removedBlock;
}
@@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) {
for (pred_iterator PI = pred_begin(CurrentBlock),
PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
// We're not interested in PRE where the block is its
// own predecessor, or in blocks with predecessors
// that are not reachable.
- if (*PI == CurrentBlock) {
+ if (P == CurrentBlock) {
NumWithout = 2;
break;
- } else if (!localAvail.count(*PI)) {
+ } else if (!localAvail.count(P)) {
NumWithout = 2;
break;
}
DenseMap<uint32_t, Value*>::iterator predV =
- localAvail[*PI]->table.find(ValNo);
- if (predV == localAvail[*PI]->table.end()) {
- PREPred = *PI;
- NumWithout++;
+ localAvail[P]->table.find(ValNo);
+ if (predV == localAvail[P]->table.end()) {
+ PREPred = P;
+ ++NumWithout;
} else if (predV->second == CurInst) {
NumWithout = 2;
} else {
- predMap[*PI] = predV->second;
- NumWith++;
+ predMap[P] = predV->second;
+ ++NumWith;
}
}
@@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) {
PREInstr->setName(CurInst->getName() + ".pre");
predMap[PREPred] = PREInstr;
VN.add(PREInstr, ValNo);
- NumGVNPRE++;
+ ++NumGVNPRE;
// Update the availability map to include the new instruction.
localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr));
@@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) {
CurInst->getName() + ".pre-phi",
CurrentBlock->begin());
for (pred_iterator PI = pred_begin(CurrentBlock),
- PE = pred_end(CurrentBlock); PI != PE; ++PI)
- Phi->addIncoming(predMap[*PI], *PI);
+ PE = pred_end(CurrentBlock); PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ Phi->addIncoming(predMap[P], P);
+ }
VN.add(Phi, ValNo);
localAvail[CurrentBlock]->table[ValNo] = Phi;
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 36bea67..b5c9dd8 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() {
}
bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
+ // If LoopSimplify form is not available, stay out of trouble. Some notes:
+ // - LSR currently only supports LoopSimplify-form loops. Indvars'
+ // canonicalization can be a pessimization without LSR to "clean up"
+ // afterwards.
+ // - We depend on having a preheader; in particular,
+ // Loop::getCanonicalInductionVariable only supports loops with preheaders,
+ // and we're in trouble if we can't find the induction variable even when
+ // we've manually inserted one.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
SE = &getAnalysis<ScalarEvolution>();
@@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
bool UsedInLoop = false;
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
UI != UE; ++UI) {
- BasicBlock *UseBB = cast<Instruction>(UI)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(UI)) {
+ User *U = *UI;
+ BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+ if (PHINode *P = dyn_cast<PHINode>(U)) {
unsigned i =
PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
UseBB = P->getIncomingBlock(i);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index df05b71..edce14c 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -18,6 +18,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
// Perhaps getConstantOnEdge should be smart enough to do this?
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
- Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ Constant *PredCst = LVI->getConstantOnEdge(V, P, BB);
if (PredCst == 0 ||
(!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
continue;
- Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), P));
}
return !Result.empty();
@@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
}
for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
+ // If we already inferred a value for this block on the LHS, don't
+ // re-add it.
+ bool HasValue = false;
+ for (unsigned r = 0, e = Result.size(); r != e; ++r)
+ if (Result[r].second == RHSVals[i].second) {
+ HasValue = true;
+ break;
+ }
+
+ if (!HasValue) {
+ Result.push_back(RHSVals[i]);
+ Result.back().first = InterestingVal;
+ }
}
return !Result.empty();
}
@@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
(!isa<Instruction>(Cmp->getOperand(0)) ||
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
-
+
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate
Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
- RHSCst, *PI, BB);
+ RHSCst, P, BB);
if (Res == LazyValueInfo::Unknown)
continue;
Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
- Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), P));
}
-
+
return !Result.empty();
}
}
@@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
if (PBI->isConditional() && PBI->getCondition() == Condition &&
- ProcessBranchOnDuplicateCond(*PI, BB))
+ ProcessBranchOnDuplicateCond(P, BB))
return true;
+ }
} else {
assert(isa<SwitchInst>(BB->getTerminator()) && "Unknown jump terminator");
- for (; PI != E; ++PI)
- if (SwitchInst *PSI = dyn_cast<SwitchInst>((*PI)->getTerminator()))
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (SwitchInst *PSI = dyn_cast<SwitchInst>(P->getTerminator()))
if (PSI->getCondition() == Condition &&
- ProcessSwitchOnDuplicateCond(*PI, BB))
+ ProcessSwitchOnDuplicateCond(P, BB))
return true;
+ }
}
}
@@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// If we have a comparison, loop over the predecessors to see if there is
// a condition with a lexically identical value.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
+ for (; PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if (BranchInst *PBI = dyn_cast<BranchInst>(P->getTerminator()))
+ if (PBI->isConditional() && P != BB) {
if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
if (CI->getOperand(0) == CondCmp->getOperand(0) &&
CI->getOperand(1) == CondCmp->getOperand(1) &&
CI->getPredicate() == CondCmp->getPredicate()) {
// TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
+ if (ProcessBranchOnDuplicateCond(P, BB))
return true;
}
}
}
+ }
}
}
@@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Add all the unavailable predecessors to the PredsToSplit list.
for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI)
- if (!AvailablePredSet.count(*PI))
- PredsToSplit.push_back(*PI);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ // If the predecessor is an indirect goto, we can't split the edge.
+ if (isa<IndirectBrInst>(P->getTerminator()))
+ return false;
+
+ if (!AvailablePredSet.count(P))
+ PredsToSplit.push_back(P);
+ }
// Split them out to their own block.
UnavailablePred =
@@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// have multiple entries here.
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E;
++PI) {
+ BasicBlock *P = *PI;
AvailablePredsTy::iterator I =
std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(),
- std::make_pair(*PI, (Value*)0));
+ std::make_pair(P, (Value*)0));
- assert(I != AvailablePreds.end() && I->first == *PI &&
+ assert(I != AvailablePreds.end() && I->first == P &&
"Didn't find entry for predecessor!");
PN->addIncoming(I->second, I->first);
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 48817ab..e4894e9 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L,
if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator()))
return false;
- BI++;
+ ++BI;
}
// Make sure that no instructions in the block have potential side-effects.
@@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
BasicBlock::iterator BI = exitBlock->begin();
while (PHINode* P = dyn_cast<PHINode>(BI)) {
P->replaceUsesOfWith(exitingBlock, preheader);
- BI++;
+ ++BI;
}
// Update the dominator tree and remove the instructions and blocks that will
@@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
LPM.deleteLoopFromQueue(L);
Changed = true;
- NumDeleted++;
+ ++NumDeleted;
return Changed;
}
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 101ff5b..31058e5 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() {
}
}
}
- NumRestrictBounds++;
+ ++NumRestrictBounds;
return true;
}
@@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() {
continue;
for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
- BI != BE; ++BI) {
+ BI != BE; ++BI) {
Instruction *Inst = BI;
if (!Inst->isSafeToSpeculativelyExecute() && !isa<PHINode>(Inst)
- && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
+ && !isa<BranchInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst))
return false;
}
}
@@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() {
BSV = getMax(BSV, IVStartValue, Sign, PHTerm);
// [*] Clone Loop
- DenseMap<const Value *, Value *> ValueMap;
- Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this);
+ ValueMap<const Value *, Value *> VMap;
+ Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this);
Loop *ALoop = L;
// [*] ALoop's exiting edge enters BLoop's header.
// ALoop's original exit block becomes BLoop's exit block.
- PHINode *B_IndVar = cast<PHINode>(ValueMap[IndVar]);
+ PHINode *B_IndVar = cast<PHINode>(VMap[IndVar]);
BasicBlock *A_ExitingBlock = ExitCondition->getParent();
BranchInst *A_ExitInsn =
dyn_cast<BranchInst>(A_ExitingBlock->getTerminator());
@@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() {
for (BasicBlock::iterator BI = ALoop->getHeader()->begin(),
BE = ALoop->getHeader()->end(); BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PHINode *PNClone = cast<PHINode>(ValueMap[PN]);
+ PHINode *PNClone = cast<PHINode>(VMap[PN]);
InverseMap[PNClone] = PN;
} else
break;
@@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() {
// block. Remove incoming PHINode values from ALoop's exiting block.
// Add new incoming values from BLoop's incoming exiting value.
// Update BLoop exit block's dominator info..
- BasicBlock *B_ExitingBlock = cast<BasicBlock>(ValueMap[A_ExitingBlock]);
+ BasicBlock *B_ExitingBlock = cast<BasicBlock>(VMap[A_ExitingBlock]);
for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end();
BI != BE; ++BI) {
if (PHINode *PN = dyn_cast<PHINode>(BI)) {
- PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
+ PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)],
B_ExitingBlock);
PN->removeIncomingValue(A_ExitingBlock);
} else
@@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() {
removeBlocks(A_InactiveBranch, L, A_ActiveBranch);
//[*] Eliminate split condition's inactive branch in from BLoop.
- BasicBlock *B_SplitCondBlock = cast<BasicBlock>(ValueMap[A_SplitCondBlock]);
+ BasicBlock *B_SplitCondBlock = cast<BasicBlock>(VMap[A_SplitCondBlock]);
BranchInst *B_BR = cast<BranchInst>(B_SplitCondBlock->getTerminator());
BasicBlock *B_InactiveBranch = NULL;
BasicBlock *B_ActiveBranch = NULL;
@@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() {
//[*] Move exit condition into split condition block to avoid
// executing dead loop iteration.
- ICmpInst *B_ExitCondition = cast<ICmpInst>(ValueMap[ExitCondition]);
- Instruction *B_IndVarIncrement = cast<Instruction>(ValueMap[IVIncrement]);
- ICmpInst *B_SplitCondition = cast<ICmpInst>(ValueMap[SplitCondition]);
+ ICmpInst *B_ExitCondition = cast<ICmpInst>(VMap[ExitCondition]);
+ Instruction *B_IndVarIncrement = cast<Instruction>(VMap[IVIncrement]);
+ ICmpInst *B_SplitCondition = cast<ICmpInst>(VMap[SplitCondition]);
moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition,
cast<ICmpInst>(SplitCondition), IndVar, IVIncrement,
@@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() {
B_SplitCondition, B_IndVar, B_IndVarIncrement,
BLoop, EVOpNum);
- NumIndexSplit++;
+ ++NumIndexSplit;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 5004483..16c4a15 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
continue; // PHI nodes don't count.
if (isa<DbgInfoIntrinsic>(OI))
continue; // Debug intrinsics don't count as size.
- Size++;
+ ++Size;
}
if (Size > MAX_HEADER_SIZE)
@@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
preserveCanonicalLoopForm(LPM);
- NumRotated++;
+ ++NumRotated;
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 86ea3eb..a250a88 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
-/// isMulSExtable - Return true if the given add can be sign-extended
+/// isMulSExtable - Return true if the given mul can be sign-extended
/// without changing its value.
-static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
- return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
}
/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
@@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (LHS == RHS)
return SE.getConstant(LHS->getType(), 1);
- // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some
- // folding.
- if (RHS->isAllOnesValue())
- return SE.getMulExpr(LHS, RHS);
+ // Handle a few RHS special cases.
+ const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
+ if (RC) {
+ const APInt &RA = RC->getValue()->getValue();
+ // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
+ // some folding.
+ if (RA.isAllOnesValue())
+ return SE.getMulExpr(LHS, RC);
+ // Handle x /s 1 as x.
+ if (RA == 1)
+ return LHS;
+ }
// Check for a division of a constant by a constant.
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
- const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
if (!RC)
return 0;
- if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0)
+ const APInt &LA = C->getValue()->getValue();
+ const APInt &RA = RC->getValue()->getValue();
+ if (LA.srem(RA) != 0)
return 0;
- return SE.getConstant(C->getValue()->getValue()
- .sdiv(RC->getValue()->getValue()));
+ return SE.getConstant(LA.sdiv(RA));
}
// Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
@@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
if (!Step) return 0;
return SE.getAddRecExpr(Start, Step, AR->getLoop());
}
+ return 0;
}
// Distribute the sdiv over add operands, if the add doesn't overflow.
@@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
return SE.getAddExpr(Ops);
}
+ return 0;
}
// Check for a multiply operand that we can pull RHS out of.
- if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
+ if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
@@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
}
return Found ? SE.getMulExpr(Ops) : 0;
}
+ return 0;
+ }
// Otherwise we don't know.
return 0;
@@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- if (II->getOperand(1) == OperandVal)
+ if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
}
@@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) {
case Intrinsic::x86_sse2_storeu_pd:
case Intrinsic::x86_sse2_storeu_dq:
case Intrinsic::x86_sse2_storel_dq:
- AccessTy = II->getOperand(1)->getType();
+ AccessTy = II->getArgOperand(0)->getType();
break;
}
}
@@ -976,6 +989,8 @@ public:
void dump() const;
};
+}
+
/// HasFormula - Test whether this use as a formula which has the same
/// registers as the given formula.
bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
@@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S,
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
}
+namespace {
+
+/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding
+/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind.
+struct UseMapDenseMapInfo {
+ static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic);
+ }
+
+ static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() {
+ return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic);
+ }
+
+ static unsigned
+ getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) {
+ unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first);
+ Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second));
+ return Result;
+ }
+
+ static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS,
+ const std::pair<const SCEV *, LSRUse::KindType> &RHS) {
+ return LHS == RHS;
+ }
+};
+
/// FormulaSorter - This class implements an ordering for formulae which sorts
/// the by their standalone cost.
class FormulaSorter {
@@ -1275,7 +1316,9 @@ class LSRInstance {
}
// Support for sharing of LSRUses between LSRFixups.
- typedef DenseMap<const SCEV *, size_t> UseMapTy;
+ typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>,
+ size_t,
+ UseMapDenseMapInfo> UseMapTy;
UseMapTy UseMap;
bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
@@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
NewRHS = Sel->getOperand(1);
else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
+ else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
+ NewRHS = SU->getValue();
else
- llvm_unreachable("Max doesn't match expected pattern!");
+ // Max doesn't match expected pattern.
+ return Cond;
// Determine the new comparison opcode. It may be signed or unsigned,
// and the original comparison may be either equality or inequality.
@@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
NewMaxOffset = NewOffset;
}
// Check for a mismatched access type, and fall back conservatively as needed.
+ // TODO: Be less conservative when the type is similar and can use the same
+ // addressing modes.
if (Kind == LSRUse::Address && AccessTy != LU.AccessTy)
NewAccessTy = Type::getVoidTy(AccessTy->getContext());
@@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr,
}
std::pair<UseMapTy::iterator, bool> P =
- UseMap.insert(std::make_pair(Expr, 0));
+ UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0));
if (!P.second) {
// A use already existed with this base.
size_t LUIdx = P.first->second;
@@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
Strides.insert(AR->getStepRecurrence(SE));
Worklist.push_back(AR->getStart());
} else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
- Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end());
+ Worklist.append(Add->op_begin(), Add->op_end());
}
} while (!Worklist.empty());
}
@@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
const SCEV *S = Worklist.pop_back_val();
if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
- Worklist.insert(Worklist.end(), N->op_begin(), N->op_end());
+ Worklist.append(N->op_begin(), N->op_end());
else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
Worklist.push_back(C->getOperand());
else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
@@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
if (!Inserted.insert(U)) continue;
const Value *V = U->getValue();
- if (const Instruction *Inst = dyn_cast<Instruction>(V))
+ if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
+ // Look for instructions defined outside the loop.
if (L->contains(Inst)) continue;
+ } else if (isa<UndefValue>(V))
+ // Undef doesn't have a live range, so it doesn't matter.
+ continue;
for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
@@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
/// separate registers. If C is non-null, multiply each subexpression by C.
static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
SmallVectorImpl<const SCEV *> &Ops,
+ SmallVectorImpl<const SCEV *> &UninterestingOps,
+ const Loop *L,
ScalarEvolution &SE) {
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
// Break out add operands.
for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
I != E; ++I)
- CollectSubexprs(*I, C, Ops, SE);
+ CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE);
return;
} else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
// Split a non-zero base out of an addrec.
if (!AR->getStart()->isZero()) {
CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
AR->getStepRecurrence(SE),
- AR->getLoop()), C, Ops, SE);
- CollectSubexprs(AR->getStart(), C, Ops, SE);
+ AR->getLoop()),
+ C, Ops, UninterestingOps, L, SE);
+ CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE);
return;
}
} else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
@@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C,
dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
CollectSubexprs(Mul->getOperand(1),
C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0,
- Ops, SE);
+ Ops, UninterestingOps, L, SE);
return;
}
}
- // Otherwise use the value itself.
- Ops.push_back(C ? SE.getMulExpr(C, S) : S);
+ // Otherwise use the value itself. Loop-variant "unknown" values are
+ // uninteresting; we won't be able to do anything meaningful with them.
+ if (!C && isa<SCEVUnknown>(S) && !S->isLoopInvariant(L))
+ UninterestingOps.push_back(S);
+ else
+ Ops.push_back(C ? SE.getMulExpr(C, S) : S);
}
/// GenerateReassociations - Split out subexpressions from adds and the bases of
@@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
const SCEV *BaseReg = Base.BaseRegs[i];
- SmallVector<const SCEV *, 8> AddOps;
- CollectSubexprs(BaseReg, 0, AddOps, SE);
+ SmallVector<const SCEV *, 8> AddOps, UninterestingAddOps;
+ CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE);
+
+ // Add any uninteresting values as one register, as we won't be able to
+ // form any interesting reassociation opportunities with them. They'll
+ // just have to be added inside the loop no matter what we do.
+ if (!UninterestingAddOps.empty())
+ AddOps.push_back(SE.getAddExpr(UninterestingAddOps));
+
if (AddOps.size() == 1) continue;
for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
@@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
continue;
// Collect all operands except *J.
- SmallVector<const SCEV *, 8> InnerAddOps;
- for (SmallVectorImpl<const SCEV *>::const_iterator K = AddOps.begin(),
- KE = AddOps.end(); K != KE; ++K)
- if (K != J)
- InnerAddOps.push_back(*K);
+ SmallVector<const SCEV *, 8> InnerAddOps
+ ( ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
+ InnerAddOps.append
+ (next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end());
// Don't leave just a constant behind in a register if the constant could
// be folded into an immediate field.
@@ -2350,13 +2415,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
for (SmallSetVector<int64_t, 8>::const_iterator
I = Factors.begin(), E = Factors.end(); I != E; ++I) {
int64_t Factor = *I;
- Formula F = Base;
// Check that the multiplication doesn't overflow.
- if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+ if (Base.AM.BaseOffs == INT64_MIN && Factor == -1)
continue;
- F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+ int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
+ if (NewBaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
@@ -2367,6 +2431,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
if (Offset / Factor != LU.MinOffset)
continue;
+ Formula F = Base;
+ F.AM.BaseOffs = NewBaseOffs;
+
// Check that this scale is legal.
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
continue;
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index ae7bf40..0c900ff 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
// This is a very ad-hoc heuristic.
if (Metrics.NumInsts > Threshold ||
Metrics.NumBlocks * 5 > Threshold ||
- Metrics.NeverInline) {
+ Metrics.containsIndirectBr || Metrics.isRecursive) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< currentLoop->getHeader()->getName() << ", cost too high: "
<< currentLoop->getBlocks().size() << "\n");
@@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) {
}
// RemapInstruction - Convert the instruction operands from referencing the
-// current values into those specified by ValueMap.
+// current values into those specified by VMap.
//
static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
+ ValueMap<const Value *, Value*> &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end()) Op = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end()) Op = It->second;
I->setOperand(op, Op);
}
}
/// CloneLoop - Recursively clone the specified loop and all of its children,
/// mapping the blocks with the specified map.
-static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap<const Value*, Value*> &VM,
+static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap<const Value*, Value*> &VM,
LoopInfo *LI, LPPassManager *LPM) {
Loop *New = new Loop();
LPM->insertLoop(New, PL);
@@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
// the loop preheader and exit blocks), keeping track of the mapping between
// the instructions and blocks.
NewBlocks.reserve(LoopBlocks.size());
- DenseMap<const Value*, Value*> ValueMap;
+ ValueMap<const Value*, Value*> VMap;
for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) {
- BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F);
+ BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F);
NewBlocks.push_back(NewBB);
- ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
+ VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping.
LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L);
}
@@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
NewBlocks[0], F->end());
// Now we create the new Loop object for the versioned loop.
- Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM);
+ Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
Loop *ParentLoop = L->getParentLoop();
if (ParentLoop) {
// Make sure to add the cloned preheader and exit blocks to the parent loop
@@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
}
for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
- BasicBlock *NewExit = cast<BasicBlock>(ValueMap[ExitBlocks[i]]);
+ BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[i]]);
// The new exit block should be in the same loop as the old one.
if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i]))
ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase());
@@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (BasicBlock::iterator I = ExitSucc->begin(); isa<PHINode>(I); ++I) {
PN = cast<PHINode>(I);
Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(V);
- if (It != ValueMap.end()) V = It->second;
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(V);
+ if (It != VMap.end()) V = It->second;
PN->addIncoming(V, NewExit);
}
}
@@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
for (BasicBlock::iterator I = NewBlocks[i]->begin(),
E = NewBlocks[i]->end(); I != E; ++I)
- RemapInstruction(I, ValueMap);
+ RemapInstruction(I, VMap);
// Rewrite the original preheader to select between versions of the loop.
BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 3611b8e..0e566c5 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) {
// Remove the memcpy
MD.removeInstruction(cpy);
cpy->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
@@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
if (MD.getDependency(C) == dep) {
MD.removeInstruction(M);
M->eraseFromParent();
- NumMemCpyInstr++;
+ ++NumMemCpyInstr;
return true;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 5aca9cdc..98452f5 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) {
// Okay, we need to materialize a negated version of V with an instruction.
// Scan the use lists of V to see if we have one already.
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- if (!BinaryOperator::isNeg(*UI)) continue;
+ User *U = *UI;
+ if (!BinaryOperator::isNeg(U)) continue;
// We found one! Now we have to make sure that the definition dominates
// this use. We do this by moving it to the entry block (if it is a
// non-instruction value) or right after the definition. These negates will
// be zapped by reassociate later, so we don't need much finesse here.
- BinaryOperator *TheNeg = cast<BinaryOperator>(*UI);
+ BinaryOperator *TheNeg = cast<BinaryOperator>(U);
// Verify that the negate is in this function, V might be a constant expr.
if (TheNeg->getParent()->getParent() != BI->getParent()->getParent())
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 5ca9ce3..dd445f6 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
DeleteDeadInstructions();
AI->eraseFromParent();
- NumReplaced++;
+ ++NumReplaced;
}
/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
@@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
isSafeGEP(GEPI, AI, GEPOffset, Info);
if (!Info.isUnsafe)
isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
- } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
+ } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
if (Length)
isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
- UI.getOperandNo() == 1, Info);
+ UI.getOperandNo() == CallInst::ArgOffset, Info);
else
MarkUnsafe(Info);
} else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
@@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If there is an other pointer, we want to convert it to the same pointer
// type as AI has, so we can GEP through it safely.
if (OtherPtr) {
+ unsigned AddrSpace =
+ cast<PointerType>(OtherPtr->getType())->getAddressSpace();
// Remove bitcasts and all-zero GEPs from OtherPtr. This is an
// optimization, but it's also required to detect the corner case where
@@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// OtherPtr may be a bitcast or GEP that currently being rewritten. (This
// function is only called for mem intrinsics that access the whole
// aggregate, so non-zero GEPs are not an issue here.)
- while (1) {
- if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
- OtherPtr = BC->getOperand(0);
- continue;
- }
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
- // All zero GEPs are effectively bitcasts.
- if (GEP->hasAllZeroIndices()) {
- OtherPtr = GEP->getOperand(0);
- continue;
- }
- }
- break;
- }
+ OtherPtr = OtherPtr->stripPointerCasts();
+
// Copying the alloca to itself is a no-op: just delete it.
if (OtherPtr == AI || OtherPtr == NewElts[0]) {
// This code will run twice for a no-op memcpy -- once for each operand.
@@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
return;
}
- if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
- if (BCE->getOpcode() == Instruction::BitCast)
- OtherPtr = BCE->getOperand(0);
-
// If the pointer is not the right type, insert a bitcast to the right
// type.
- if (OtherPtr->getType() != AI->getType())
- OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
- MI);
+ const Type *NewTy =
+ PointerType::get(AI->getType()->getElementType(), AddrSpace);
+
+ if (OtherPtr->getType() != NewTy)
+ OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
}
// Process each element of the aggregate.
@@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// If the stored element is zero (common case), just store a null
// constant.
Constant *StoreVal;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
if (CI->isZero()) {
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
@@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
Value *Ops[] = {
SROADest ? EltPtr : OtherElt, // Dest ptr
SROADest ? OtherElt : EltPtr, // Src ptr
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
// Align
ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign),
MI->getVolatileCst()
@@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
} else {
assert(isa<MemSetInst>(MI));
Value *Ops[] = {
- EltPtr, MI->getOperand(2), // Dest, Value,
- ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
+ EltPtr, MI->getArgOperand(1), // Dest, Value,
+ ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size
Zero, // Align
ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile
};
@@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
}
- ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ // Don't create an 'or x, 0' on the first iteration.
+ if (!isa<Constant>(ResultVal) ||
+ !cast<Constant>(ResultVal)->isNullValue())
+ ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
+ else
+ ResultVal = SrcField;
}
// Handle tail padding by truncating the result
@@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
if (isOffset) return false;
// If the memintrinsic isn't using the alloca as the dest, reject it.
- if (UI.getOperandNo() != 1) return false;
+ if (UI.getOperandNo() != CallInst::ArgOffset) return false;
// If the source of the memcpy/move is not a constant global, reject it.
if (!PointsToConstantGlobal(MI->getSource()))
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9744100..49d93a2 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB,
// they should be changed to unreachable by passes that can't modify the
// CFG.
if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ // Don't touch volatile stores.
+ if (SI->isVolatile()) continue;
+
Value *Ptr = SI->getOperand(1);
if (isa<UndefValue>(Ptr) ||
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 7414be7..b1c6191 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -66,6 +66,11 @@ public:
this->TD = TD;
if (CI->getCalledFunction())
Context = &CI->getCalledFunction()->getContext();
+
+ // We never change the calling convention.
+ if (CI->getCallingConv() != llvm::CallingConv::C)
+ return NULL;
+
return CallOptimizer(CI->getCalledFunction(), CI, B);
}
};
@@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) {
return true;
}
+/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality
+/// comparisons with With.
+static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) {
+ for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
+ UI != E; ++UI) {
+ if (ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
+ if (IC->isEquality() && IC->getOperand(1) == With)
+ continue;
+ // Unknown instruction.
+ return false;
+ }
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// String and Memory LibCall Optimizations
//===----------------------------------------------------------------------===//
@@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization {
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
// See if we can get the length of the input string.
uint64_t Len = GetStringLength(Src);
@@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt {
return 0;
// Extract some information from the instruction
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
uint64_t Len;
// We don't do anything if length is not constant
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Len = LengthArg->getZExtValue();
else
return 0;
@@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization {
FT->getParamType(0) != FT->getReturnType())
return 0;
- Value *SrcStr = CI->getOperand(1);
+ Value *SrcStr = CI->getArgOperand(0);
// If the second operand is non-constant, see if we can compute the length
// of the input string and turn this into memchr.
- ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getOperand(2));
+ ConstantInt *CharC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
if (CharC == 0) {
// These optimizations require TargetData.
if (!TD) return 0;
@@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization {
if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32.
return 0;
- return EmitMemChr(SrcStr, CI->getOperand(2), // include nul.
+ return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul.
ConstantInt::get(TD->getIntPtrType(*Context), Len),
B, TD);
}
@@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization {
// Verify the "strcmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strcmp(x,x) -> 0
return ConstantInt::get(CI->getType(), 0);
@@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization {
// Verify the "strncmp" function prototype.
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
+ Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1);
if (Str1P == Str2P) // strncmp(x,x,n) -> 0
return ConstantInt::get(CI->getType(), 0);
// Get the length argument if it is constant.
uint64_t Length;
- if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getOperand(3)))
+ if (ConstantInt *LengthArg = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
Length = LengthArg->getZExtValue();
else
return 0;
@@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization {
if (Length == 0) // strncmp(x,y,0) -> 0
return ConstantInt::get(CI->getType(), 0);
+ if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1)
+ return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD);
+
std::string Str1, Str2;
bool HasStr1 = GetConstantStringInfo(Str1P, Str1);
bool HasStr2 = GetConstantStringInfo(Str2P, Str2);
@@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization {
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
- Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2);
+ Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) // strcpy(x,x) -> x
return Src;
@@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization {
if (OptChkCall)
EmitMemCpyChk(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
- CI->getOperand(3), B, TD);
+ CI->getArgOperand(2), B, TD);
else
EmitMemCpy(Dst, Src,
ConstantInt::get(TD->getIntPtrType(*Context), Len),
@@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization {
!FT->getParamType(2)->isIntegerTy())
return 0;
- Value *Dst = CI->getOperand(1);
- Value *Src = CI->getOperand(2);
- Value *LenOp = CI->getOperand(3);
+ Value *Dst = CI->getArgOperand(0);
+ Value *Src = CI->getArgOperand(1);
+ Value *LenOp = CI->getArgOperand(2);
// See if we can get the length of the input string.
uint64_t SrcLen = GetStringLength(Src);
@@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy())
return 0;
- Value *Src = CI->getOperand(1);
+ Value *Src = CI->getArgOperand(0);
// Constant folding: strlen("xyz") -> 3
if (uint64_t Len = GetStringLength(Src))
@@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization {
!FT->getParamType(1)->isPointerTy())
return 0;
- Value *EndPtr = CI->getOperand(2);
+ Value *EndPtr = CI->getArgOperand(1);
if (isa<ConstantPointerNull>(EndPtr)) {
CI->setOnlyReadsMemory();
CI->addAttribute(1, Attribute::NoCapture);
@@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization {
return 0;
// fold strstr(x, x) -> x.
- if (CI->getOperand(1) == CI->getOperand(2))
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ if (CI->getArgOperand(0) == CI->getArgOperand(1))
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
+
+ // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0
+ if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) {
+ Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD);
+ Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1),
+ StrLen, B, TD);
+ for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end();
+ UI != UE; ) {
+ ICmpInst *Old = cast<ICmpInst>(UI++);
+ Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp,
+ ConstantInt::getNullValue(StrNCmp->getType()),
+ "cmp");
+ Old->replaceAllUsesWith(Cmp);
+ Old->eraseFromParent();
+ }
+ return CI;
+ }
// See if either input string is a constant string.
std::string SearchStr, ToFindStr;
- bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr);
- bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr);
+ bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr);
+ bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr);
// fold strstr(x, "") -> x.
if (HasStr2 && ToFindStr.empty())
- return B.CreateBitCast(CI->getOperand(1), CI->getType());
+ return B.CreateBitCast(CI->getArgOperand(0), CI->getType());
// If both strings are known, constant fold it.
if (HasStr1 && HasStr2) {
@@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization {
return Constant::getNullValue(CI->getType());
// strstr("abcd", "bc") -> gep((char*)"abcd", 1)
- Value *Result = CastToCStr(CI->getOperand(1), B);
+ Value *Result = CastToCStr(CI->getArgOperand(0), B);
Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr");
return B.CreateBitCast(Result, CI->getType());
}
// fold strstr(x, "y") -> strchr(x, 'y').
if (HasStr2 && ToFindStr.size() == 1)
- return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD),
+ return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD),
CI->getType());
return 0;
}
@@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy(32))
return 0;
- Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2);
+ Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1);
if (LHS == RHS) // memcmp(s,s,x) -> 0
return Constant::getNullValue(CI->getType());
// Make sure we have a constant length.
- ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *LenC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!LenC) return 0;
uint64_t Len = LenC->getZExtValue();
@@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization {
return 0;
// memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization {
return 0;
// memmove(x, y, n) -> llvm.memmove(x, y, n, 1)
- EmitMemMove(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), 1, false, B, TD);
- return CI->getOperand(1);
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), 1, false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization {
return 0;
// memset(p, v, n) -> llvm.memset(p, v, n, 1)
- Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context),
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
- return CI->getOperand(1);
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ return CI->getArgOperand(0);
}
};
@@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization {
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2);
+ Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0
return Op1C;
@@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization {
!FT->getParamType(0)->isFloatingPointTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32
// Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32
Value *LdExpArg = 0;
if (SIToFPInst *OpC = dyn_cast<SIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32)
LdExpArg = B.CreateSExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ Type::getInt32Ty(*Context), "tmp");
} else if (UIToFPInst *OpC = dyn_cast<UIToFPInst>(Op)) {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
LdExpArg = B.CreateZExt(OpC->getOperand(0),
- Type::getInt32Ty(*Context), "tmp");
+ Type::getInt32Ty(*Context), "tmp");
}
if (LdExpArg) {
@@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization {
return 0;
// If this is something like 'floor((double)floatval)', convert to floorf.
- FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
+ FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getArgOperand(0));
if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy())
return 0;
@@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization {
// Just make sure this has 2 arguments of the same FP type, which match the
// result type.
if (FT->getNumParams() != 1 ||
- !FT->getReturnType()->isIntegerTy(32) ||
+ !FT->getReturnType()->isIntegerTy(32) ||
!FT->getParamType(0)->isIntegerTy())
return 0;
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
// Constant fold.
if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
@@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization {
Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp");
return B.CreateSelect(Cond, V,
- ConstantInt::get(Type::getInt32Ty(*Context), 0));
+ ConstantInt::get(Type::getInt32Ty(*Context), 0));
}
};
@@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization {
return 0;
// isdigit(c) -> (c-'0') <u 10
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'),
"isdigittmp");
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10),
@@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization {
return 0;
// isascii(c) -> c <u 128
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128),
"isascii");
return B.CreateZExt(Op, CI->getType());
@@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization {
return 0;
// abs(x) -> x >s -1 ? x : -x
- Value *Op = CI->getOperand(1);
+ Value *Op = CI->getArgOperand(0);
Value *Pos = B.CreateICmpSGT(Op,
Constant::getAllOnesValue(Op->getType()),
"ispos");
@@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization {
return 0;
// isascii(c) -> c & 0x7f
- return B.CreateAnd(CI->getOperand(1),
+ return B.CreateAnd(CI->getArgOperand(0),
ConstantInt::get(CI->getType(),0x7F));
}
};
@@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(1), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr))
return 0;
// Empty format string -> noop.
@@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization {
}
// Optimize specific format strings.
- // printf("%c", chr) --> putchar(*(i8*)dst)
- if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isIntegerTy()) {
- Value *Res = EmitPutChar(CI->getOperand(2), B, TD);
+ // printf("%c", chr) --> putchar(chr)
+ if (FormatStr == "%c" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isIntegerTy()) {
+ Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD);
if (CI->use_empty()) return CI;
return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
- if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
- CI->getOperand(2)->getType()->isPointerTy() &&
+ if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 &&
+ CI->getArgOperand(1)->getType()->isPointerTy() &&
CI->use_empty()) {
- EmitPutS(CI->getOperand(2), B, TD);
+ EmitPutS(CI->getArgOperand(1), B, TD);
return CI;
}
return 0;
@@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization {
// Check for a fixed format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// If we just have a format string (nothing else crazy) transform it.
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
// Make sure there's no % in the constant array. We could try to handle
// %% -> % in the future if we cared.
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
@@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1)
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte.
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte.
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()+1), 1, false, B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
@@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization {
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- Value *V = B.CreateTrunc(CI->getOperand(3),
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ Value *V = B.CreateTrunc(CI->getArgOperand(2),
Type::getInt8Ty(*Context), "char");
- Value *Ptr = CastToCStr(CI->getOperand(1), B);
+ Value *Ptr = CastToCStr(CI->getArgOperand(0), B);
B.CreateStore(V, Ptr);
Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1),
"nul");
@@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
+ if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0;
- Value *Len = EmitStrLen(CI->getOperand(3), B, TD);
+ Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD);
Value *IncLen = B.CreateAdd(Len,
ConstantInt::get(Len->getType(), 1),
"leninc");
- EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD);
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD);
// The sprintf result is the unincremented number of bytes in the string.
return B.CreateIntCast(Len, CI->getType(), false);
@@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization {
return 0;
// Get the element size and count.
- ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getOperand(2));
- ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getOperand(3));
+ ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+ ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2));
if (!SizeC || !CountC) return 0;
uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue();
@@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization {
// If this is writing one byte, turn it into fputc.
if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F)
- Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char");
- EmitFPutC(Char, CI->getOperand(4), B, TD);
+ Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char");
+ EmitFPutC(Char, CI->getArgOperand(3), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
@@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization {
return 0;
// fputs(s,F) --> fwrite(s,1,strlen(s),F)
- uint64_t Len = GetStringLength(CI->getOperand(1));
+ uint64_t Len = GetStringLength(CI->getArgOperand(0));
if (!Len) return 0;
- EmitFWrite(CI->getOperand(1),
+ EmitFWrite(CI->getArgOperand(0),
ConstantInt::get(TD->getIntPtrType(*Context), Len-1),
- CI->getOperand(2), B, TD);
+ CI->getArgOperand(1), B, TD);
return CI; // Known to have no uses (see above).
}
};
@@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization {
// All the optimizations depend on the format string.
std::string FormatStr;
- if (!GetConstantStringInfo(CI->getOperand(2), FormatStr))
+ if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr))
return 0;
// fprintf(F, "foo") --> fwrite("foo", 3, 1, F)
- if (CI->getNumOperands() == 3) {
+ if (CI->getNumArgOperands() == 2) {
for (unsigned i = 0, e = FormatStr.size(); i != e; ++i)
if (FormatStr[i] == '%') // Could handle %% -> % if we cared.
return 0; // We found a format specifier.
@@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization {
// These optimizations require TargetData.
if (!TD) return 0;
- EmitFWrite(CI->getOperand(2),
+ EmitFWrite(CI->getArgOperand(1),
ConstantInt::get(TD->getIntPtrType(*Context),
FormatStr.size()),
- CI->getOperand(1), B, TD);
+ CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), FormatStr.size());
}
// The remaining optimizations require the format string to be "%s" or "%c"
// and have an extra operand.
- if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4)
+ if (FormatStr.size() != 2 || FormatStr[0] != '%' ||
+ CI->getNumArgOperands() < 3)
return 0;
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
- // fprintf(F, "%c", chr) --> *(i8*)dst = chr
- if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
- EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD);
+ // fprintf(F, "%c", chr) --> fputc(chr, F)
+ if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0;
+ EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
- // fprintf(F, "%s", str) -> fputs(str, F)
- if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
+ // fprintf(F, "%s", str) --> fputs(str, F)
+ if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty())
return 0;
- EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD);
+ EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD);
return CI;
}
return 0;
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 2306a77..9208238 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock,
// there is only one other pred, get it, otherwise we can't handle it.
PI = pred_begin(DstBlock); PE = pred_end(DstBlock);
BasicBlock *DstOtherPred = 0;
- if (*PI == SrcBlock) {
+ BasicBlock *P = *PI;
+ if (P == SrcBlock) {
if (++PI == PE) return 0;
DstOtherPred = *PI;
if (++PI != PE) return 0;
} else {
- DstOtherPred = *PI;
+ DstOtherPred = P;
if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0;
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5ad5de2..01c8e5d 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -16,9 +16,9 @@
// transformation from taking place, though currently the analysis cannot
// support moving any really useful instructions (only dead ones).
// 2. This pass transforms functions that are prevented from being tail
-// recursive by an associative expression to use an accumulator variable,
-// thus compiling the typical naive factorial or 'fib' implementation into
-// efficient code.
+// recursive by an associative and commutative expression to use an
+// accumulator variable, thus compiling the typical naive factorial or
+// 'fib' implementation into efficient code.
// 3. TRE is performed if the function returns void, if the return
// returns the result returned by the call, or if the function returns a
// run-time constant on all exits from the function. It is possible, though
@@ -60,6 +60,7 @@
#include "llvm/Pass.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/Loads.h"
#include "llvm/Support/CallSite.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
@@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
// If we are passing this argument into call as the corresponding
// argument operand, then the argument is dynamically constant.
// Otherwise, we cannot transform this function safely.
- if (CI->getOperand(ArgNo+1) == Arg)
+ if (CI->getArgOperand(ArgNo) == Arg)
return true;
}
@@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
}
// getCommonReturnValue - Check to see if the function containing the specified
-// return instruction and tail call consistently returns the same
-// runtime-constant value at all exit points. If so, return the returned value.
+// tail call consistently returns the same runtime-constant value at all exit
+// points except for IgnoreRI. If so, return the returned value.
//
-static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
- Function *F = TheRI->getParent()->getParent();
+static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) {
+ Function *F = CI->getParent()->getParent();
Value *ReturnedValue = 0;
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
- if (RI != TheRI) {
+ if (RI != IgnoreRI) {
Value *RetOp = RI->getOperand(0);
// We can only perform this transformation if the value returned is
@@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
///
Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
CallInst *CI) {
- if (!I->isAssociative()) return 0;
+ if (!I->isAssociative() || !I->isCommutative()) return 0;
assert(I->getNumOperands() == 2 &&
- "Associative operations should have 2 args!");
+ "Associative/commutative operations should have 2 args!");
// Exactly one operand should be the result of the call instruction...
if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
@@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
return false;
}
- // If we are introducing accumulator recursion to eliminate associative
- // operations after the call instruction, this variable contains the initial
- // value for the accumulator. If this value is set, we actually perform
- // accumulator recursion elimination instead of simple tail recursion
- // elimination.
+ // If we are introducing accumulator recursion to eliminate operations after
+ // the call instruction that are both associative and commutative, the initial
+ // value for the accumulator is placed in this variable. If this value is set
+ // then we actually perform accumulator recursion elimination instead of
+ // simple tail recursion elimination. If the operation is an LLVM instruction
+ // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then
+ // we are handling the case when the return instruction returns a constant C
+ // which is different to the constant returned by other return instructions
+ // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a
+ // special case of accumulator recursion, the operation being "return C".
Value *AccumulatorRecursionEliminationInitVal = 0;
Instruction *AccumulatorRecursionInstr = 0;
@@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI)
if (!CanMoveAboveCall(BBI, CI)) {
// If we can't move the instruction above the call, it might be because it
- // is an associative operation that could be tranformed using accumulator
- // recursion elimination. Check to see if this is the case, and if so,
- // remember the initial accumulator value for later.
+ // is an associative and commutative operation that could be tranformed
+ // using accumulator recursion elimination. Check to see if this is the
+ // case, and if so, remember the initial accumulator value for later.
if ((AccumulatorRecursionEliminationInitVal =
CanTransformAccumulatorRecursion(BBI, CI))) {
// Yes, this is accumulator recursion. Remember which instruction
@@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI &&
!isa<UndefValue>(Ret->getReturnValue()) &&
AccumulatorRecursionEliminationInitVal == 0 &&
- !getCommonReturnValue(Ret, CI))
- return false;
+ !getCommonReturnValue(0, CI)) {
+ // One case remains that we are able to handle: the current return
+ // instruction returns a constant, and all other return instructions
+ // return a different constant.
+ if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret))
+ return false; // Current return instruction does not return a constant.
+ // Check that all other return instructions return a common constant. If
+ // so, record it in AccumulatorRecursionEliminationInitVal.
+ AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI);
+ if (!AccumulatorRecursionEliminationInitVal)
+ return false;
+ }
// OK! We can transform this tail call. If this is the first one found,
// create the new entry block, allowing us to branch back to the old entry.
@@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// Ok, now that we know we have a pseudo-entry block WITH all of the
// required PHI nodes, add entries into the PHI node for the actual
// parameters passed into the tail-recursive call.
- for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i)
- ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB);
+ for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
+ ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB);
// If we are introducing an accumulator variable to eliminate the recursion,
// do so now. Note that we _know_ that no subsequent tail recursion
@@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
if (AccumulatorRecursionEliminationInitVal) {
Instruction *AccRecInstr = AccumulatorRecursionInstr;
// Start by inserting a new PHI node for the accumulator.
- PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr",
- OldEntry->begin());
+ PHINode *AccPN =
+ PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(),
+ "accumulator.tr", OldEntry->begin());
// Loop over all of the predecessors of the tail recursion block. For the
// real entry into the function we seed the PHI with the initial value,
@@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
// it will not show up as a predecessor.
for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry);
PI != PE; ++PI) {
- if (*PI == &F->getEntryBlock())
- AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI);
+ BasicBlock *P = *PI;
+ if (P == &F->getEntryBlock())
+ AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P);
else
- AccPN->addIncoming(AccPN, *PI);
+ AccPN->addIncoming(AccPN, P);
}
- // Add an incoming argument for the current block, which is computed by our
- // associative accumulator instruction.
- AccPN->addIncoming(AccRecInstr, BB);
-
- // Next, rewrite the accumulator recursion instruction so that it does not
- // use the result of the call anymore, instead, use the PHI node we just
- // inserted.
- AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ if (AccRecInstr) {
+ // Add an incoming argument for the current block, which is computed by
+ // our associative and commutative accumulator instruction.
+ AccPN->addIncoming(AccRecInstr, BB);
+
+ // Next, rewrite the accumulator recursion instruction so that it does not
+ // use the result of the call anymore, instead, use the PHI node we just
+ // inserted.
+ AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN);
+ } else {
+ // Add an incoming argument for the current block, which is just the
+ // constant returned by the current return instruction.
+ AccPN->addIncoming(Ret->getReturnValue(), BB);
+ }
// Finally, rewrite any return instructions in the program to return the PHI
// node instead of the "initval" that they do currently. This loop will
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index ea9d1c1..4d64c85 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -381,29 +381,28 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
const TargetLowering &TLI) {
std::vector<InlineAsm::ConstraintInfo>
Constraints = IA->ParseConstraints();
-
- unsigned ArgNo = 1; // ArgNo - The operand of the CallInst.
+
+ unsigned ArgNo = 0; // The argument of the CallInst.
for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo OpInfo(Constraints[i]);
-
+
// Compute the value type for each operand.
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.isIndirect)
- OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
break;
case InlineAsm::isInput:
- OpInfo.CallOperandVal = CI->getOperand(ArgNo++);
+ OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++);
break;
case InlineAsm::isClobber:
// Nothing to do.
break;
}
-
+
// Compute the constraint code and ConstraintType to use.
- TLI.ComputeConstraintToUse(OpInfo, SDValue(),
- OpInfo.ConstraintType == TargetLowering::C_Memory);
-
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
+
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
if (OpInfo.CallOperandVal == OpVal &&
@@ -411,7 +410,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
!OpInfo.isIndirect))
return false;
}
-
+
return true;
}
@@ -450,7 +449,7 @@ static bool FindAllMemoryUses(Instruction *I,
if (CallInst *CI = dyn_cast<CallInst>(U)) {
InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
- if (IA == 0) return true;
+ if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2f1ae00..ec625b4 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -558,121 +558,3 @@ void llvm::FindFunctionBackedges(const Function &F,
}
-
-
-
-/// AreEquivalentAddressValues - Test if A and B will obviously have the same
-/// value. This includes recognizing that %t0 and %t1 will have the same
-/// value in code like this:
-/// %t0 = getelementptr \@a, 0, 3
-/// store i32 0, i32* %t0
-/// %t1 = getelementptr \@a, 0, 3
-/// %t2 = load i32* %t1
-///
-static bool AreEquivalentAddressValues(const Value *A, const Value *B) {
- // Test if the values are trivially equivalent.
- if (A == B) return true;
-
- // Test if the values come from identical arithmetic instructions.
- // Use isIdenticalToWhenDefined instead of isIdenticalTo because
- // this function is only used when one address use dominates the
- // other, which means that they'll always either have the same
- // value or one of them will have an undefined value.
- if (isa<BinaryOperator>(A) || isa<CastInst>(A) ||
- isa<PHINode>(A) || isa<GetElementPtrInst>(A))
- if (const Instruction *BI = dyn_cast<Instruction>(B))
- if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI))
- return true;
-
- // Otherwise they may not be equivalent.
- return false;
-}
-
-/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the
-/// instruction before ScanFrom) checking to see if we have the value at the
-/// memory address *Ptr locally available within a small number of instructions.
-/// If the value is available, return it.
-///
-/// If not, return the iterator for the last validated instruction that the
-/// value would be live through. If we scanned the entire block and didn't find
-/// something that invalidates *Ptr or provides it, ScanFrom would be left at
-/// begin() and this returns null. ScanFrom could also be left
-///
-/// MaxInstsToScan specifies the maximum instructions to scan in the block. If
-/// it is set to 0, it will scan the whole block. You can also optionally
-/// specify an alias analysis implementation, which makes this more precise.
-Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,
- BasicBlock::iterator &ScanFrom,
- unsigned MaxInstsToScan,
- AliasAnalysis *AA) {
- if (MaxInstsToScan == 0) MaxInstsToScan = ~0U;
-
- // If we're using alias analysis to disambiguate get the size of *Ptr.
- unsigned AccessSize = 0;
- if (AA) {
- const Type *AccessTy = cast<PointerType>(Ptr->getType())->getElementType();
- AccessSize = AA->getTypeStoreSize(AccessTy);
- }
-
- while (ScanFrom != ScanBB->begin()) {
- // We must ignore debug info directives when counting (otherwise they
- // would affect codegen).
- Instruction *Inst = --ScanFrom;
- if (isa<DbgInfoIntrinsic>(Inst))
- continue;
-
- // Restore ScanFrom to expected value in case next test succeeds
- ScanFrom++;
-
- // Don't scan huge blocks.
- if (MaxInstsToScan-- == 0) return 0;
-
- --ScanFrom;
- // If this is a load of Ptr, the loaded value is available.
- if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
- if (AreEquivalentAddressValues(LI->getOperand(0), Ptr))
- return LI;
-
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
- // If this is a store through Ptr, the value is available!
- if (AreEquivalentAddressValues(SI->getOperand(1), Ptr))
- return SI->getOperand(0);
-
- // If Ptr is an alloca and this is a store to a different alloca, ignore
- // the store. This is a trivial form of alias analysis that is important
- // for reg2mem'd code.
- if ((isa<AllocaInst>(Ptr) || isa<GlobalVariable>(Ptr)) &&
- (isa<AllocaInst>(SI->getOperand(1)) ||
- isa<GlobalVariable>(SI->getOperand(1))))
- continue;
-
- // If we have alias analysis and it says the store won't modify the loaded
- // value, ignore the store.
- if (AA &&
- (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
- continue;
-
- // Otherwise the store that may or may not alias the pointer, bail out.
- ++ScanFrom;
- return 0;
- }
-
- // If this is some other instruction that may clobber Ptr, bail out.
- if (Inst->mayWriteToMemory()) {
- // If alias analysis claims that it really won't modify the load,
- // ignore it.
- if (AA &&
- (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0)
- continue;
-
- // May modify the pointer, bail out.
- ++ScanFrom;
- return 0;
- }
- }
-
- // Got to the start of the block, we didn't find it, but are done for this
- // block.
- return 0;
-}
-
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 8c25ad1..26f53c0 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -106,11 +106,12 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
// If AllowIdenticalEdges is true, then we allow this edge to be considered
// non-critical iff all preds come from TI's block.
while (I != E) {
- if (*I != FirstPred)
+ const BasicBlock *P = *I;
+ if (P != FirstPred)
return true;
// Note: leave this as is until no one ever compiles with either gcc 4.0.1
// or Xcode 2. This seems to work around the pred_iterator assert in PR 2207
- E = pred_end(*I);
+ E = pred_end(P);
++I;
}
return false;
@@ -277,11 +278,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
OtherPreds.push_back(PN->getIncomingBlock(i));
} else {
for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB);
- I != E; ++I)
- if (*I != NewBB)
- OtherPreds.push_back(*I);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (P != NewBB)
+ OtherPreds.push_back(P);
+ }
}
-
+
bool NewBBDominatesDestBB = true;
// Should we update DominatorTree information?
@@ -400,11 +403,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
bool HasPredOutsideOfLoop = false;
BasicBlock *Exit = ExitBlocks[i];
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit);
- I != E; ++I)
- if (TIL->contains(*I))
- Preds.push_back(*I);
+ I != E; ++I) {
+ BasicBlock *P = *I;
+ if (TIL->contains(P))
+ Preds.push_back(P);
else
HasPredOutsideOfLoop = true;
+ }
// If there are any preds not in the loop, we'll need to split
// the edges. The Preds.empty() check is needed because a block
// may appear multiple times in the list. We can't use
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index 767fa3a..7a9d007 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -69,6 +69,31 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B,
return CI;
}
+/// EmitStrNCmp - Emit a call to the strncmp function to the builder.
+Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI[3];
+ AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
+ AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
+ AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly |
+ Attribute::NoUnwind);
+
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3),
+ B.getInt32Ty(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context), NULL);
+ CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B),
+ CastToCStr(Ptr2, B), Len, "strncmp");
+
+ if (const Function *F = dyn_cast<Function>(StrNCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
+}
+
/// EmitStrCpy - Emit a call to the strcpy function to the builder, for the
/// specified pointer arguments.
Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B,
@@ -112,10 +137,10 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align,
bool isVolatile, IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
- const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
- Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
Dst = CastToCStr(Dst, B);
Src = CastToCStr(Src, B);
+ const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() };
+ Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3);
return B.CreateCall5(MemCpy, Dst, Src, Len,
ConstantInt::get(B.getInt32Ty(), Align),
ConstantInt::get(B.getInt1Ty(), isVolatile));
@@ -395,11 +420,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
1, false, B, TD);
- replaceCall(CI->getOperand(1));
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -418,11 +443,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
1, false, B, TD);
- replaceCall(CI->getOperand(1));
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -436,12 +461,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
FT->getParamType(2) != TD->getIntPtrType(Context) ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(),
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(),
false);
- EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD);
- replaceCall(CI->getOperand(1));
+ EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD);
+ replaceCall(CI->getArgOperand(0));
return true;
}
return false;
@@ -462,8 +487,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
// st[rp]cpy_chk call which may fail at runtime if the size is too long.
// TODO: It might be nice to get a maximum length out of the possible
// string lengths for varying.
- if (isFoldable(3, 2, true)) {
- Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD,
+ if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) {
+ Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD,
Name.substr(2, 6));
replaceCall(Ret);
return true;
@@ -479,10 +504,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) {
!FT->getParamType(2)->isIntegerTy() ||
FT->getParamType(3) != TD->getIntPtrType(Context))
return false;
-
- if (isFoldable(4, 3, false)) {
- Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), B, TD, Name.substr(2, 7));
+
+ if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) {
+ Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), B, TD, Name.substr(2, 7));
replaceCall(Ret);
return true;
}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 6d4fe4b..1dcfd57 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -32,7 +32,7 @@ using namespace llvm;
// CloneBasicBlock - See comments in Cloning.h
BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
ClonedCodeInfo *CodeInfo) {
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
@@ -47,7 +47,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[II] = NewInst; // Add instruction map to value.
+ VMap[II] = NewInst; // Add instruction map to value.
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -72,7 +72,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
// ArgMap values.
//
void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix, ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
@@ -80,17 +80,17 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
#ifndef NDEBUG
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- assert(ValueMap.count(I) && "No mapping from source argument specified!");
+ assert(VMap.count(I) && "No mapping from source argument specified!");
#endif
// Clone any attributes.
if (NewFunc->arg_size() == OldFunc->arg_size())
NewFunc->copyAttributesFrom(OldFunc);
else {
- //Some arguments were deleted with the ValueMap. Copy arguments one by one
+ //Some arguments were deleted with the VMap. Copy arguments one by one
for (Function::const_arg_iterator I = OldFunc->arg_begin(),
E = OldFunc->arg_end(); I != E; ++I)
- if (Argument* Anew = dyn_cast<Argument>(ValueMap[I]))
+ if (Argument* Anew = dyn_cast<Argument>(VMap[I]))
Anew->addAttr( OldFunc->getAttributes()
.getParamAttributes(I->getArgNo() + 1));
NewFunc->setAttributes(NewFunc->getAttributes()
@@ -111,43 +111,43 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc,
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc,
CodeInfo);
- ValueMap[&BB] = CBB; // Add basic block mapping.
+ VMap[&BB] = CBB; // Add basic block mapping.
if (ReturnInst *RI = dyn_cast<ReturnInst>(CBB->getTerminator()))
Returns.push_back(RI);
}
// Loop over all of the instructions in the function, fixing up operand
- // references as we go. This uses ValueMap to do all the hard work.
+ // references as we go. This uses VMap to do all the hard work.
//
- for (Function::iterator BB = cast<BasicBlock>(ValueMap[OldFunc->begin()]),
+ for (Function::iterator BB = cast<BasicBlock>(VMap[OldFunc->begin()]),
BE = NewFunc->end(); BB != BE; ++BB)
// Loop over all instructions, fixing each one as we find it...
for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II)
- RemapInstruction(II, ValueMap);
+ RemapInstruction(II, VMap);
}
/// CloneFunction - Return a copy of the specified function, but without
/// embedding the function into another module. Also, any references specified
-/// in the ValueMap are changed to refer to their mapped value instead of the
-/// original one. If any of the arguments to the function are in the ValueMap,
-/// the arguments are deleted from the resultant function. The ValueMap is
+/// in the VMap are changed to refer to their mapped value instead of the
+/// original one. If any of the arguments to the function are in the VMap,
+/// the arguments are deleted from the resultant function. The VMap is
/// updated to include mappings from all of the instructions and basicblocks in
/// the function from their old to new values.
///
Function *llvm::CloneFunction(const Function *F,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
ClonedCodeInfo *CodeInfo) {
std::vector<const Type*> ArgTypes;
// The user might be deleting arguments to the function by specifying them in
- // the ValueMap. If so, we need to not add the arguments to the arg ty vector
+ // the VMap. If so, we need to not add the arguments to the arg ty vector
//
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
- if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet?
+ if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet?
ArgTypes.push_back(I->getType());
// Create a new function type...
@@ -161,13 +161,13 @@ Function *llvm::CloneFunction(const Function *F,
Function::arg_iterator DestI = NewF->arg_begin();
for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I)
- if (ValueMap.count(I) == 0) { // Is this argument preserved?
+ if (VMap.count(I) == 0) { // Is this argument preserved?
DestI->setName(I->getName()); // Copy the name over...
- ValueMap[I] = DestI++; // Add mapping to ValueMap
+ VMap[I] = DestI++; // Add mapping to VMap
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo);
+ CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo);
return NewF;
}
@@ -179,19 +179,19 @@ namespace {
struct PruningFunctionCloner {
Function *NewFunc;
const Function *OldFunc;
- DenseMap<const Value*, Value*> &ValueMap;
+ ValueToValueMapTy &VMap;
SmallVectorImpl<ReturnInst*> &Returns;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
const TargetData *TD;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
- DenseMap<const Value*, Value*> &valueMap,
+ ValueToValueMapTy &valueMap,
SmallVectorImpl<ReturnInst*> &returns,
const char *nameSuffix,
ClonedCodeInfo *codeInfo,
const TargetData *td)
- : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns),
+ : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns),
NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) {
}
@@ -202,7 +202,7 @@ namespace {
public:
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
- /// mapping its operands through ValueMap if they are available.
+ /// mapping its operands through VMap if they are available.
Constant *ConstantFoldMappedInstruction(const Instruction *I);
};
}
@@ -211,7 +211,7 @@ namespace {
/// anything that it can reach.
void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
std::vector<const BasicBlock*> &ToClone){
- Value *&BBEntry = ValueMap[BB];
+ Value *&BBEntry = VMap[BB];
// Have we already cloned this block?
if (BBEntry) return;
@@ -230,7 +230,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If this instruction constant folds, don't bother cloning the instruction,
// instead, just add the constant to the value map.
if (Constant *C = ConstantFoldMappedInstruction(II)) {
- ValueMap[II] = C;
+ VMap[II] = C;
continue;
}
@@ -238,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[II] = NewInst; // Add instruction map to value.
+ VMap[II] = NewInst; // Add instruction map to value.
hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II));
if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) {
@@ -258,12 +258,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
// Or is a known constant in the caller...
if (Cond == 0)
- Cond = dyn_cast_or_null<ConstantInt>(ValueMap[BI->getCondition()]);
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[BI->getCondition()]);
// Constant fold to uncond branch!
if (Cond) {
BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue());
- ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
@@ -272,10 +272,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// If switching on a value known constant in the caller.
ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition());
if (Cond == 0) // Or known constant after constant prop in the callee...
- Cond = dyn_cast_or_null<ConstantInt>(ValueMap[SI->getCondition()]);
+ Cond = dyn_cast_or_null<ConstantInt>(VMap[SI->getCondition()]);
if (Cond) { // Constant fold to uncond branch!
BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond));
- ValueMap[OldTI] = BranchInst::Create(Dest, NewBB);
+ VMap[OldTI] = BranchInst::Create(Dest, NewBB);
ToClone.push_back(Dest);
TerminatorDone = true;
}
@@ -286,7 +286,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
if (OldTI->hasName())
NewInst->setName(OldTI->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
- ValueMap[OldTI] = NewInst; // Add instruction map to value.
+ VMap[OldTI] = NewInst; // Add instruction map to value.
// Recursively clone any reachable successor blocks.
const TerminatorInst *TI = BB->getTerminator();
@@ -307,13 +307,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
}
/// ConstantFoldMappedInstruction - Constant fold the specified instruction,
-/// mapping its operands through ValueMap if they are available.
+/// mapping its operands through VMap if they are available.
Constant *PruningFunctionCloner::
ConstantFoldMappedInstruction(const Instruction *I) {
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
- ValueMap)))
+ VMap)))
Ops.push_back(Op);
else
return 0; // All operands not constant!
@@ -363,7 +363,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
/// dead. Since this doesn't produce an exact copy of the input, it can't be
/// used for things like CloneFunction or CloneModule.
void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueToValueMapTy &VMap,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
@@ -374,10 +374,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
#ifndef NDEBUG
for (Function::const_arg_iterator II = OldFunc->arg_begin(),
E = OldFunc->arg_end(); II != E; ++II)
- assert(ValueMap.count(II) && "No mapping from source argument specified!");
+ assert(VMap.count(II) && "No mapping from source argument specified!");
#endif
- PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns,
+ PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns,
NameSuffix, CodeInfo, TD);
// Clone the entry block, and anything recursively reachable from it.
@@ -397,14 +397,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVector<const PHINode*, 16> PHIToResolve;
for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end();
BI != BE; ++BI) {
- BasicBlock *NewBB = cast_or_null<BasicBlock>(ValueMap[BI]);
+ BasicBlock *NewBB = cast_or_null<BasicBlock>(VMap[BI]);
if (NewBB == 0) continue; // Dead block.
// Add the new block to the new function.
NewFunc->getBasicBlockList().push_back(NewBB);
// Loop over all of the instructions in the block, fixing up operand
- // references as we go. This uses ValueMap to do all the hard work.
+ // references as we go. This uses VMap to do all the hard work.
//
BasicBlock::iterator I = NewBB->begin();
@@ -455,7 +455,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
I->setMetadata(DbgKind, 0);
}
}
- RemapInstruction(I, ValueMap);
+ RemapInstruction(I, VMap);
}
}
@@ -465,19 +465,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
const PHINode *OPN = PHIToResolve[phino];
unsigned NumPreds = OPN->getNumIncomingValues();
const BasicBlock *OldBB = OPN->getParent();
- BasicBlock *NewBB = cast<BasicBlock>(ValueMap[OldBB]);
+ BasicBlock *NewBB = cast<BasicBlock>(VMap[OldBB]);
// Map operands for blocks that are live and remove operands for blocks
// that are dead.
for (; phino != PHIToResolve.size() &&
PHIToResolve[phino]->getParent() == OldBB; ++phino) {
OPN = PHIToResolve[phino];
- PHINode *PN = cast<PHINode>(ValueMap[OPN]);
+ PHINode *PN = cast<PHINode>(VMap[OPN]);
for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) {
if (BasicBlock *MappedBlock =
- cast_or_null<BasicBlock>(ValueMap[PN->getIncomingBlock(pred)])) {
+ cast_or_null<BasicBlock>(VMap[PN->getIncomingBlock(pred)])) {
Value *InVal = MapValue(PN->getIncomingValue(pred),
- ValueMap);
+ VMap);
assert(InVal && "Unknown input value?");
PN->setIncomingValue(pred, InVal);
PN->setIncomingBlock(pred, MappedBlock);
@@ -531,15 +531,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
while ((PN = dyn_cast<PHINode>(I++))) {
Value *NV = UndefValue::get(PN->getType());
PN->replaceAllUsesWith(NV);
- assert(ValueMap[OldI] == PN && "ValueMap mismatch");
- ValueMap[OldI] = NV;
+ assert(VMap[OldI] == PN && "VMap mismatch");
+ VMap[OldI] = NV;
PN->eraseFromParent();
++OldI;
}
}
// NOTE: We cannot eliminate single entry phi nodes here, because of
- // ValueMap. Single entry phi nodes can have multiple ValueMap entries
- // pointing at them. Thus, deleting one would require scanning the ValueMap
+ // VMap. Single entry phi nodes can have multiple VMap entries
+ // pointing at them. Thus, deleting one would require scanning the VMap
// to update any entries in it that would require that. This would be
// really slow.
}
@@ -548,14 +548,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// and zap unconditional fall-through branches. This happen all the time when
// specializing code: code specialization turns conditional branches into
// uncond branches, and this code folds them.
- Function::iterator I = cast<BasicBlock>(ValueMap[&OldFunc->getEntryBlock()]);
+ Function::iterator I = cast<BasicBlock>(VMap[&OldFunc->getEntryBlock()]);
while (I != NewFunc->end()) {
BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
if (!BI || BI->isConditional()) { ++I; continue; }
// Note that we can't eliminate uncond branches if the destination has
// single-entry PHI nodes. Eliminating the single-entry phi nodes would
- // require scanning the ValueMap to update any entries that point to the phi
+ // require scanning the VMap to update any entries that point to the phi
// node.
BasicBlock *Dest = BI->getSuccessor(0);
if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
index 38928dc..551b630 100644
--- a/lib/Transforms/Utils/CloneLoop.cpp
+++ b/lib/Transforms/Utils/CloneLoop.cpp
@@ -15,7 +15,6 @@
#include "llvm/BasicBlock.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
-#include "llvm/ADT/DenseMap.h"
using namespace llvm;
@@ -23,13 +22,13 @@ using namespace llvm;
/// CloneDominatorInfo - Clone basicblock's dominator tree and, if available,
/// dominance info. It is expected that basic block is already cloned.
static void CloneDominatorInfo(BasicBlock *BB,
- DenseMap<const Value *, Value *> &ValueMap,
+ ValueMap<const Value *, Value *> &VMap,
DominatorTree *DT,
DominanceFrontier *DF) {
assert (DT && "DominatorTree is not available");
- DenseMap<const Value *, Value*>::iterator BI = ValueMap.find(BB);
- assert (BI != ValueMap.end() && "BasicBlock clone is missing");
+ ValueMap<const Value *, Value*>::iterator BI = VMap.find(BB);
+ assert (BI != VMap.end() && "BasicBlock clone is missing");
BasicBlock *NewBB = cast<BasicBlock>(BI->second);
// NewBB already got dominator info.
@@ -43,11 +42,11 @@ static void CloneDominatorInfo(BasicBlock *BB,
// NewBB's dominator is either BB's dominator or BB's dominator's clone.
BasicBlock *NewBBDom = BBDom;
- DenseMap<const Value *, Value*>::iterator BBDomI = ValueMap.find(BBDom);
- if (BBDomI != ValueMap.end()) {
+ ValueMap<const Value *, Value*>::iterator BBDomI = VMap.find(BBDom);
+ if (BBDomI != VMap.end()) {
NewBBDom = cast<BasicBlock>(BBDomI->second);
if (!DT->getNode(NewBBDom))
- CloneDominatorInfo(BBDom, ValueMap, DT, DF);
+ CloneDominatorInfo(BBDom, VMap, DT, DF);
}
DT->addNewBlock(NewBB, NewBBDom);
@@ -60,8 +59,8 @@ static void CloneDominatorInfo(BasicBlock *BB,
for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end();
I != E; ++I) {
BasicBlock *DB = *I;
- DenseMap<const Value*, Value*>::iterator IDM = ValueMap.find(DB);
- if (IDM != ValueMap.end())
+ ValueMap<const Value*, Value*>::iterator IDM = VMap.find(DB);
+ if (IDM != VMap.end())
NewDFSet.insert(cast<BasicBlock>(IDM->second));
else
NewDFSet.insert(DB);
@@ -71,10 +70,10 @@ static void CloneDominatorInfo(BasicBlock *BB,
}
}
-/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap
+/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
/// using old blocks to new blocks mapping.
Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
- DenseMap<const Value *, Value *> &ValueMap, Pass *P) {
+ ValueMap<const Value *, Value *> &VMap, Pass *P) {
DominatorTree *DT = NULL;
DominanceFrontier *DF = NULL;
@@ -104,8 +103,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone");
- ValueMap[BB] = NewBB;
+ BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
+ VMap[BB] = NewBB;
if (P)
LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
@@ -117,7 +116,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I) {
BasicBlock *BB = *I;
- CloneDominatorInfo(BB, ValueMap, DT, DF);
+ CloneDominatorInfo(BB, VMap, DT, DF);
}
// Process sub loops
@@ -125,7 +124,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
LoopNest.push_back(*I);
} while (!LoopNest.empty());
- // Remap instructions to reference operands from ValueMap.
+ // Remap instructions to reference operands from VMap.
for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(),
NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) {
BasicBlock *NB = *NBItr;
@@ -135,8 +134,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI,
for (unsigned index = 0, num_ops = Insn->getNumOperands();
index != num_ops; ++index) {
Value *Op = Insn->getOperand(index);
- DenseMap<const Value *, Value *>::iterator OpItr = ValueMap.find(Op);
- if (OpItr != ValueMap.end())
+ ValueMap<const Value *, Value *>::iterator OpItr = VMap.find(Op);
+ if (OpItr != VMap.end())
Insn->setOperand(index, OpItr->second);
}
}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index b87c082..fc603d2 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -28,12 +28,12 @@ using namespace llvm;
Module *llvm::CloneModule(const Module *M) {
// Create the value map that maps things from the old module over to the new
// module.
- DenseMap<const Value*, Value*> ValueMap;
- return CloneModule(M, ValueMap);
+ ValueToValueMapTy VMap;
+ return CloneModule(M, VMap);
}
Module *llvm::CloneModule(const Module *M,
- DenseMap<const Value*, Value*> &ValueMap) {
+ ValueToValueMapTy &VMap) {
// First off, we need to create the new module...
Module *New = new Module(M->getModuleIdentifier(), M->getContext());
New->setDataLayout(M->getDataLayout());
@@ -51,7 +51,7 @@ Module *llvm::CloneModule(const Module *M,
New->addLibrary(*I);
// Loop over all of the global variables, making corresponding globals in the
- // new module. Here we add them to the ValueMap and to the new Module. We
+ // new module. Here we add them to the VMap and to the new Module. We
// don't worry about attributes or initializers, they will come later.
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
@@ -62,7 +62,7 @@ Module *llvm::CloneModule(const Module *M,
GlobalValue::ExternalLinkage, 0,
I->getName());
GV->setAlignment(I->getAlignment());
- ValueMap[I] = GV;
+ VMap[I] = GV;
}
// Loop over the functions in the module, making external functions as before
@@ -71,13 +71,13 @@ Module *llvm::CloneModule(const Module *M,
Function::Create(cast<FunctionType>(I->getType()->getElementType()),
GlobalValue::ExternalLinkage, I->getName(), New);
NF->copyAttributesFrom(I);
- ValueMap[I] = NF;
+ VMap[I] = NF;
}
// Loop over the aliases in the module
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I)
- ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
+ VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage,
I->getName(), NULL, New);
// Now that all of the things that global variable initializer can refer to
@@ -86,10 +86,10 @@ Module *llvm::CloneModule(const Module *M,
//
for (Module::const_global_iterator I = M->global_begin(), E = M->global_end();
I != E; ++I) {
- GlobalVariable *GV = cast<GlobalVariable>(ValueMap[I]);
+ GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
if (I->hasInitializer())
GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
- ValueMap)));
+ VMap)));
GV->setLinkage(I->getLinkage());
GV->setThreadLocal(I->isThreadLocal());
GV->setConstant(I->isConstant());
@@ -98,17 +98,17 @@ Module *llvm::CloneModule(const Module *M,
// Similarly, copy over function bodies now...
//
for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
- Function *F = cast<Function>(ValueMap[I]);
+ Function *F = cast<Function>(VMap[I]);
if (!I->isDeclaration()) {
Function::arg_iterator DestI = F->arg_begin();
for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end();
++J) {
DestI->setName(J->getName());
- ValueMap[J] = DestI++;
+ VMap[J] = DestI++;
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(F, I, ValueMap, Returns);
+ CloneFunctionInto(F, I, VMap, Returns);
}
F->setLinkage(I->getLinkage());
@@ -117,11 +117,37 @@ Module *llvm::CloneModule(const Module *M,
// And aliases
for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
I != E; ++I) {
- GlobalAlias *GA = cast<GlobalAlias>(ValueMap[I]);
+ GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
GA->setLinkage(I->getLinkage());
if (const Constant* C = I->getAliasee())
- GA->setAliasee(cast<Constant>(MapValue(C, ValueMap)));
+ GA->setAliasee(cast<Constant>(MapValue(C, VMap)));
}
-
+
+ // And named metadata....
+ for (Module::const_named_metadata_iterator I = M->named_metadata_begin(),
+ E = M->named_metadata_end(); I != E; ++I) {
+ const NamedMDNode &NMD = *I;
+ SmallVector<MDNode*, 4> MDs;
+ for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
+ MDs.push_back(cast<MDNode>(MapValue(NMD.getOperand(i), VMap)));
+ NamedMDNode::Create(New->getContext(), NMD.getName(),
+ MDs.data(), MDs.size(), New);
+ }
+
+ // Update metadata attach with instructions.
+ for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end();
+ FI != FE; ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+ BI != BE; ++BI) {
+ SmallVector<std::pair<unsigned, MDNode *>, 4 > MDs;
+ BI->getAllMetadata(MDs);
+ for (SmallVector<std::pair<unsigned, MDNode *>, 4>::iterator
+ MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) {
+ Value *MappedValue = MapValue(MDI->second, VMap);
+ if (MDI->second != MappedValue && MappedValue)
+ BI->setMetadata(MDI->first, cast<MDNode>(MappedValue));
+ }
+ }
return New;
}
diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp
index c908b4a..8e82a02 100644
--- a/lib/Transforms/Utils/DemoteRegToStack.cpp
+++ b/lib/Transforms/Utils/DemoteRegToStack.cpp
@@ -35,7 +35,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
I.eraseFromParent();
return 0;
}
-
+
// Create a stack slot to hold the value.
AllocaInst *Slot;
if (AllocaPoint) {
@@ -46,7 +46,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem",
F->getEntryBlock().begin());
}
-
+
// Change all of the users of the instruction to read from the stack slot
// instead.
while (!I.use_empty()) {
@@ -67,7 +67,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
Value *&V = Loads[PN->getIncomingBlock(i)];
if (V == 0) {
// Insert the load into the predecessor block
- V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
+ V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads,
PN->getIncomingBlock(i)->getTerminator());
}
PN->setIncomingValue(i, V);
@@ -110,8 +110,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads,
/// The phi node is deleted and it returns the pointer to the alloca inserted.
AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
if (P->use_empty()) {
- P->eraseFromParent();
- return 0;
+ P->eraseFromParent();
+ return 0;
}
// Create a stack slot to hold the value.
@@ -124,23 +124,23 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) {
Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem",
F->getEntryBlock().begin());
}
-
+
// Iterate over each operand, insert store in each predecessor.
for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) {
if (InvokeInst *II = dyn_cast<InvokeInst>(P->getIncomingValue(i))) {
- assert(II->getParent() != P->getIncomingBlock(i) &&
+ assert(II->getParent() != P->getIncomingBlock(i) &&
"Invoke edge not supported yet"); II=II;
}
- new StoreInst(P->getIncomingValue(i), Slot,
+ new StoreInst(P->getIncomingValue(i), Slot,
P->getIncomingBlock(i)->getTerminator());
}
-
+
// Insert load in place of the phi and replace all uses.
Value *V = new LoadInst(Slot, P->getName()+".reload", P);
P->replaceAllUsesWith(V);
-
+
// Delete phi.
P->eraseFromParent();
-
+
return Slot;
}
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 91390bc..598e7d2 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -63,7 +63,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
// Next, create the new invoke instruction, inserting it at the end
// of the old basic block.
- SmallVector<Value*, 8> InvokeArgs(CI->op_begin()+1, CI->op_end());
+ ImmutableCallSite CS(CI);
+ SmallVector<Value*, 8> InvokeArgs(CS.arg_begin(), CS.arg_end());
InvokeInst *II =
InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest,
InvokeArgs.begin(), InvokeArgs.end(),
@@ -169,7 +170,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock,
/// some edges of the callgraph may remain.
static void UpdateCallGraphAfterInlining(CallSite CS,
Function::iterator FirstNewBlock,
- DenseMap<const Value*, Value*> &ValueMap,
+ ValueMap<const Value*, Value*> &VMap,
InlineFunctionInfo &IFI) {
CallGraph &CG = *IFI.CG;
const Function *Caller = CS.getInstruction()->getParent()->getParent();
@@ -192,9 +193,9 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
for (; I != E; ++I) {
const Value *OrigCall = I->first;
- DenseMap<const Value*, Value*>::iterator VMI = ValueMap.find(OrigCall);
+ ValueMap<const Value*, Value*>::iterator VMI = VMap.find(OrigCall);
// Only copy the edge if the call was inlined!
- if (VMI == ValueMap.end() || VMI->second == 0)
+ if (VMI == VMap.end() || VMI->second == 0)
continue;
// If the call was inlined, but then constant folded, there is no edge to
@@ -285,8 +286,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
ClonedCodeInfo InlinedFunctionInfo;
Function::iterator FirstNewBlock;
- { // Scope to destroy ValueMap after cloning.
- DenseMap<const Value*, Value*> ValueMap;
+ { // Scope to destroy VMap after cloning.
+ ValueMap<const Value*, Value*> VMap;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -351,16 +352,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Uses of the argument in the function should use our new alloca
// instead.
ActualArg = NewAlloca;
+
+ // Calls that we inline may use the new alloca, so we need to clear
+ // their 'tail' flags.
+ MustClearTailCallFlags = true;
}
- ValueMap[I] = ActualArg;
+ VMap[I] = ActualArg;
}
// We want the inliner to prune the code as it copies. We would LOVE to
// have no dead or constant instructions leftover after inlining occurs
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
- CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
+ CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i",
&InlinedFunctionInfo, IFI.TD, TheCall);
// Remember the first block that is newly cloned over.
@@ -368,7 +373,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
// Update the callgraph if requested.
if (IFI.CG)
- UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI);
+ UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
}
// If there are any alloca instructions in the block that used to be the entry
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index df6e603..e90c30b 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -190,14 +190,15 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
UI != E; ++UI) {
- BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
- if (PHINode *PN = dyn_cast<PHINode>(*UI))
+ User *U = *UI;
+ BasicBlock *UserBB = cast<Instruction>(U)->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(U))
UserBB = PN->getIncomingBlock(UI);
if (InstBB != UserBB && !inLoop(UserBB))
UsesToRewrite.push_back(&UI.getUse());
}
-
+
// If there are no uses outside the loop, exit with no change.
if (UsesToRewrite.empty()) return false;
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index d03f7a6..0b48a8f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -35,111 +35,6 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
-// Local analysis.
-//
-
-/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and
-/// bitcasts to get back to the underlying object being addressed, keeping
-/// track of the offset in bytes from the GEPs relative to the result.
-/// This is closely related to Value::getUnderlyingObject but is located
-/// here to avoid making VMCore depend on TargetData.
-static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
- uint64_t &ByteOffset,
- unsigned MaxLookup = 6) {
- if (!V->getType()->isPointerTy())
- return V;
- for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
- if (!GEP->hasAllConstantIndices())
- return V;
- SmallVector<Value*, 8> Indices(GEP->op_begin() + 1, GEP->op_end());
- ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(),
- &Indices[0], Indices.size());
- V = GEP->getPointerOperand();
- } else if (Operator::getOpcode(V) == Instruction::BitCast) {
- V = cast<Operator>(V)->getOperand(0);
- } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
- if (GA->mayBeOverridden())
- return V;
- V = GA->getAliasee();
- } else {
- return V;
- }
- assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- }
- return V;
-}
-
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap. If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
-bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,
- unsigned Align, const TargetData *TD) {
- uint64_t ByteOffset = 0;
- Value *Base = V;
- if (TD)
- Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset);
-
- const Type *BaseType = 0;
- unsigned BaseAlign = 0;
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
- // An alloca is safe to load from as load as it is suitably aligned.
- BaseType = AI->getAllocatedType();
- BaseAlign = AI->getAlignment();
- } else if (const GlobalValue *GV = dyn_cast<GlobalValue>(Base)) {
- // Global variables are safe to load from but their size cannot be
- // guaranteed if they are overridden.
- if (!isa<GlobalAlias>(GV) && !GV->mayBeOverridden()) {
- BaseType = GV->getType()->getElementType();
- BaseAlign = GV->getAlignment();
- }
- }
-
- if (BaseType && BaseType->isSized()) {
- if (TD && BaseAlign == 0)
- BaseAlign = TD->getPrefTypeAlignment(BaseType);
-
- if (Align <= BaseAlign) {
- if (!TD)
- return true; // Loading directly from an alloca or global is OK.
-
- // Check if the load is within the bounds of the underlying object.
- const PointerType *AddrTy = cast<PointerType>(V->getType());
- uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType());
- if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) &&
- (Align == 0 || (ByteOffset % Align) == 0))
- return true;
- }
- }
-
- // Otherwise, be a little bit aggressive by scanning the local block where we
- // want to check to see if the pointer is already being loaded or stored
- // from/to. If so, the previous load or store would have already trapped,
- // so there is no harm doing an extra load (also, CSE will later eliminate
- // the load entirely).
- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
-
- while (BBI != E) {
- --BBI;
-
- // If we see a free or a call which may write to memory (i.e. which might do
- // a free) the pointer could be marked invalid.
- if (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
- !isa<DbgInfoIntrinsic>(BBI))
- return false;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI->getOperand(0) == V) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (SI->getOperand(1) == V) return true;
- }
- }
- return false;
-}
-
-
-//===----------------------------------------------------------------------===//
// Local constant propagation.
//
@@ -537,9 +432,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
// Use that list to make another list of common predecessors of BB and Succ
BlockSet CommonPreds;
for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI)
- if (BBPreds.count(*PI))
- CommonPreds.insert(*PI);
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (BBPreds.count(P))
+ CommonPreds.insert(P);
+ }
// Shortcut, if there are no common predecessors, merging is always safe
if (CommonPreds.empty())
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 1ef3c32..4f4edf3 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -142,9 +142,11 @@ ReprocessLoop:
if (*BB == L->getHeader()) continue;
SmallPtrSet<BasicBlock *, 4> BadPreds;
- for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI)
- if (!L->contains(*PI))
- BadPreds.insert(*PI);
+ for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){
+ BasicBlock *P = *PI;
+ if (!L->contains(P))
+ BadPreds.insert(P);
+ }
// Delete each unique out-of-loop (and thus dead) predecessor.
for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
@@ -192,7 +194,7 @@ ReprocessLoop:
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L);
if (Preheader) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
}
@@ -215,7 +217,7 @@ ReprocessLoop:
// allowed.
if (!L->contains(*PI)) {
if (RewriteLoopExitBlock(L, ExitBlock)) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
break;
@@ -244,7 +246,7 @@ ReprocessLoop:
// loop header.
LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
if (LoopLatch) {
- NumInserted++;
+ ++NumInserted;
Changed = true;
}
}
@@ -353,16 +355,18 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
// Compute the set of predecessors of the loop that are not in the loop.
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
- PI != PE; ++PI)
- if (!L->contains(*PI)) { // Coming in from outside the loop?
+ PI != PE; ++PI) {
+ BasicBlock *P = *PI;
+ if (!L->contains(P)) { // Coming in from outside the loop?
// If the loop is branched to from an indirect branch, we won't
// be able to fully transform the loop, because it prohibits
// edge splitting.
- if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
// Keep track of it.
- OutsideBlocks.push_back(*PI);
+ OutsideBlocks.push_back(P);
}
+ }
// Split out the loop pre-header.
BasicBlock *NewBB =
@@ -385,13 +389,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
/// outside of the loop.
BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
SmallVector<BasicBlock*, 8> LoopBlocks;
- for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
- if (L->contains(*I)) {
+ for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) {
+ BasicBlock *P = *I;
+ if (L->contains(P)) {
// Don't do this if the loop is exited via an indirect branch.
- if (isa<IndirectBrInst>((*I)->getTerminator())) return 0;
+ if (isa<IndirectBrInst>(P->getTerminator())) return 0;
- LoopBlocks.push_back(*I);
+ LoopBlocks.push_back(P);
}
+ }
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
@@ -559,10 +565,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
// Determine which blocks should stay in L and which should be moved out to
// the Outer loop now.
std::set<BasicBlock*> BlocksInL;
- for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI)
- if (DT->dominates(Header, *PI))
- AddBlockAndPredsToSet(*PI, Header, BlocksInL);
-
+ for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+ BasicBlock *P = *PI;
+ if (DT->dominates(Header, P))
+ AddBlockAndPredsToSet(P, Header, BlocksInL);
+ }
// Scan all of the loop children of L, moving them to OuterLoop if they are
// not part of the inner loop.
@@ -610,8 +617,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
- for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
- if (*I != Preheader) BackedgeBlocks.push_back(*I);
+ for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+ BasicBlock *P = *I;
+ if (P != Preheader) BackedgeBlocks.push_back(P);
+ }
// Create and insert the new backedge block...
BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 84fd1eb..e0e07e7 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -37,13 +37,13 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
static inline void RemapInstruction(Instruction *I,
- DenseMap<const Value *, Value*> &ValueMap) {
+ ValueMap<const Value *, Value*> &VMap) {
for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {
Value *Op = I->getOperand(op);
- DenseMap<const Value *, Value*>::iterator It = ValueMap.find(Op);
- if (It != ValueMap.end())
+ ValueMap<const Value *, Value*>::iterator It = VMap.find(Op);
+ if (It != VMap.end())
I->setOperand(op, It->second);
}
}
@@ -183,7 +183,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
- typedef DenseMap<const Value*, Value*> ValueToValueMapTy;
+ typedef ValueMap<const Value*, Value*> ValueToValueMapTy;
ValueToValueMapTy LastValueMap;
std::vector<PHINode*> OrigPHINode;
for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
@@ -205,26 +205,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
for (std::vector<BasicBlock*>::iterator BB = LoopBlocks.begin(),
E = LoopBlocks.end(); BB != E; ++BB) {
- ValueToValueMapTy ValueMap;
- BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It));
+ ValueToValueMapTy VMap;
+ BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
Header->getParent()->getBasicBlockList().push_back(New);
// Loop over all of the PHI nodes in the block, changing them to use the
// incoming values from the previous block.
if (*BB == Header)
for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) {
- PHINode *NewPHI = cast<PHINode>(ValueMap[OrigPHINode[i]]);
+ PHINode *NewPHI = cast<PHINode>(VMap[OrigPHINode[i]]);
Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock);
if (Instruction *InValI = dyn_cast<Instruction>(InVal))
if (It > 1 && L->contains(InValI))
InVal = LastValueMap[InValI];
- ValueMap[OrigPHINode[i]] = InVal;
+ VMap[OrigPHINode[i]] = InVal;
New->getInstList().erase(NewPHI);
}
// Update our running map of newest clones
LastValueMap[*BB] = New;
- for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end();
+ for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end();
VI != VE; ++VI)
LastValueMap[VI->first] = VI->second;
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 0ed8c72..2696e69 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -45,6 +45,7 @@
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Target/TargetLowering.h"
@@ -62,10 +63,7 @@ static cl::opt<bool> ExpensiveEHSupport("enable-correct-eh-support",
namespace {
class LowerInvoke : public FunctionPass {
// Used for both models.
- Constant *WriteFn;
Constant *AbortFn;
- Value *AbortMessage;
- unsigned AbortMessageLength;
// Used for expensive EH support.
const Type *JBLinkTy;
@@ -92,10 +90,8 @@ namespace {
}
private:
- void createAbortMessage(Module *M);
- void writeAbortMessage(Instruction *IB);
bool insertCheapEHSupport(Function &F);
- void splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes);
+ void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*>&Invokes);
void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
AllocaInst *InvokeNum, AllocaInst *StackPtr,
SwitchInst *CatchSwitch);
@@ -123,7 +119,6 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
bool LowerInvoke::doInitialization(Module &M) {
const Type *VoidPtrTy =
Type::getInt8PtrTy(M.getContext());
- AbortMessage = 0;
if (useExpensiveEHSupport) {
// Insert a type for the linked list of jump buffers.
unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
@@ -175,68 +170,14 @@ bool LowerInvoke::doInitialization(Module &M) {
// We need the 'write' and 'abort' functions for both models.
AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()),
(Type *)0);
-#if 0 // "write" is Unix-specific.. code is going away soon anyway.
- WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty,
- VoidPtrTy, Type::Int32Ty, (Type *)0);
-#else
- WriteFn = 0;
-#endif
return true;
}
-void LowerInvoke::createAbortMessage(Module *M) {
- if (useExpensiveEHSupport) {
- // The abort message for expensive EH support tells the user that the
- // program 'unwound' without an 'invoke' instruction.
- Constant *Msg =
- ConstantArray::get(M->getContext(),
- "ERROR: Exception thrown, but not caught!\n");
- AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
-
- GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
- GlobalValue::InternalLinkage,
- Msg, "abortmsg");
- std::vector<Constant*> GEPIdx(2,
- Constant::getNullValue(Type::getInt32Ty(M->getContext())));
- AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
- } else {
- // The abort message for cheap EH support tells the user that EH is not
- // enabled.
- Constant *Msg =
- ConstantArray::get(M->getContext(),
- "Exception handler needed, but not enabled."
- "Recompile program with -enable-correct-eh-support.\n");
- AbortMessageLength = Msg->getNumOperands()-1; // don't include \0
-
- GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true,
- GlobalValue::InternalLinkage,
- Msg, "abortmsg");
- std::vector<Constant*> GEPIdx(2, Constant::getNullValue(
- Type::getInt32Ty(M->getContext())));
- AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2);
- }
-}
-
-
-void LowerInvoke::writeAbortMessage(Instruction *IB) {
-#if 0
- if (AbortMessage == 0)
- createAbortMessage(IB->getParent()->getParent()->getParent());
-
- // These are the arguments we WANT...
- Value* Args[3];
- Args[0] = ConstantInt::get(Type::Int32Ty, 2);
- Args[1] = AbortMessage;
- Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength);
- (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall();
-#endif
-}
-
bool LowerInvoke::insertCheapEHSupport(Function &F) {
bool Changed = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
// Insert a normal call instruction...
CallInst *NewCall = CallInst::Create(II->getCalledValue(),
CallArgs.begin(), CallArgs.end(),
@@ -257,9 +198,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
++NumInvokes; Changed = true;
} else if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
- // Insert a new call to write(2, AbortMessage, AbortMessageLength);
- writeAbortMessage(UI);
-
// Insert a call to abort()
CallInst::Create(AbortFn, "", UI)->setTailCall();
@@ -320,7 +258,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
// Insert a normal call instruction.
- std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
+ SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(),
CallArgs.begin(), CallArgs.end(), "",
II);
@@ -349,7 +287,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set<BasicBlock*> &LiveBBs) {
// across the unwind edge. This process also splits all critical edges
// coming out of invoke's.
void LowerInvoke::
-splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
+splitLiveRangesLiveAcrossInvokes(SmallVectorImpl<InvokeInst*> &Invokes) {
// First step, split all critical edges from invoke instructions.
for (unsigned i = 0, e = Invokes.size(); i != e; ++i) {
InvokeInst *II = Invokes[i];
@@ -371,16 +309,33 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
++AfterAllocaInsertPt;
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- // This is always a no-op cast because we're casting AI to AI->getType() so
- // src and destination types are identical. BitCast is the only possibility.
- CastInst *NC = new BitCastInst(
- AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
- AI->replaceAllUsesWith(NC);
- // Normally its is forbidden to replace a CastInst's operand because it
- // could cause the opcode to reflect an illegal conversion. However, we're
- // replacing it here with the same value it was constructed with to simply
- // make NC its user.
- NC->setOperand(0, AI);
+ const Type *Ty = AI->getType();
+ // Aggregate types can't be cast, but are legal argument types, so we have
+ // to handle them differently. We use an extract/insert pair as a
+ // lightweight method to achieve the same goal.
+ if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
+ Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt);
+ Instruction *NI = InsertValueInst::Create(AI, EI, 0);
+ NI->insertAfter(EI);
+ AI->replaceAllUsesWith(NI);
+ // Set the operand of the instructions back to the AllocaInst.
+ EI->setOperand(0, AI);
+ NI->setOperand(0, AI);
+ } else {
+ // This is always a no-op cast because we're casting AI to AI->getType()
+ // so src and destination types are identical. BitCast is the only
+ // possibility.
+ CastInst *NC = new BitCastInst(
+ AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt);
+ AI->replaceAllUsesWith(NC);
+ // Set the operand of the cast instruction back to the AllocaInst.
+ // Normally it's forbidden to replace a CastInst's operand because it
+ // could cause the opcode to reflect an illegal conversion. However,
+ // we're replacing it here with the same value it was constructed with.
+ // We do this because the above replaceAllUsesWith() clobbered the
+ // operand, but we want this one to remain.
+ NC->setOperand(0, AI);
+ }
}
// Finally, scan the code looking for instructions with bad live ranges.
@@ -402,7 +357,7 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
continue;
// Avoid iterator invalidation by copying users to a temporary vector.
- std::vector<Instruction*> Users;
+ SmallVector<Instruction*,16> Users;
for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end();
UI != E; ++UI) {
Instruction *User = cast<Instruction>(*UI);
@@ -452,9 +407,9 @@ splitLiveRangesLiveAcrossInvokes(std::vector<InvokeInst*> &Invokes) {
}
bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
- std::vector<ReturnInst*> Returns;
- std::vector<UnwindInst*> Unwinds;
- std::vector<InvokeInst*> Invokes;
+ SmallVector<ReturnInst*,16> Returns;
+ SmallVector<UnwindInst*,16> Unwinds;
+ SmallVector<InvokeInst*,16> Invokes;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
@@ -502,12 +457,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
new AllocaInst(JBLinkTy, 0, Align,
"jblink", F.begin()->begin());
- std::vector<Value*> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
- Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1));
- OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) };
+ OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
"OldBuf",
- EntryBB->getTerminator());
+ EntryBB->getTerminator());
// Copy the JBListHead to the alloca.
Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true,
@@ -552,7 +506,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
"setjmp.cont");
Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0);
- Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(),
+ Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2],
"TheJmpBuf",
EntryBB->getTerminator());
JmpBufPtr = new BitCastInst(JmpBufPtr,
@@ -605,24 +559,20 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
// Create the block to do the longjmp.
// Get a pointer to the jmpbuf and longjmp.
- std::vector<Value*> Idx;
- Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext())));
- Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0));
- Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf",
+ Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())),
+ ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) };
+ Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf",
UnwindBlock);
Idx[0] = new BitCastInst(Idx[0],
Type::getInt8PtrTy(F.getContext()),
"tmp", UnwindBlock);
Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
- CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock);
+ CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
new UnreachableInst(F.getContext(), UnwindBlock);
// Set up the term block ("throw without a catch").
new UnreachableInst(F.getContext(), TermBlock);
- // Insert a new call to write(2, AbortMessage, AbortMessageLength);
- writeAbortMessage(TermBlock->getTerminator());
-
// Insert a call to abort()
CallInst::Create(AbortFn, "",
TermBlock->getTerminator())->setTailCall();
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 13f0a28..c0de193 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -69,11 +69,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// Only allow direct and non-volatile loads and stores...
for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
- UI != UE; ++UI) // Loop over all of the uses of the alloca
- if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ UI != UE; ++UI) { // Loop over all of the uses of the alloca
+ const User *U = *UI;
+ if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (LI->isVolatile())
return false;
- } else if (const StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == AI)
return false; // Don't allow a store OF the AI, only INTO the AI.
if (SI->isVolatile())
@@ -81,6 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
} else {
return false;
}
+ }
return true;
}
@@ -603,9 +605,8 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info,
// To determine liveness, we must iterate through the predecessors of blocks
// where the def is live. Blocks are added to the worklist if we need to
// check their predecessors. Start with all the using blocks.
- SmallVector<BasicBlock*, 64> LiveInBlockWorklist;
- LiveInBlockWorklist.insert(LiveInBlockWorklist.end(),
- Info.UsingBlocks.begin(), Info.UsingBlocks.end());
+ SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(),
+ Info.UsingBlocks.end());
// If any of the using blocks is also a definition block, check to see if the
// definition occurs before or after the use. If it happens before the use,
@@ -897,6 +898,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
// Propagate any debug metadata from the store onto the dbg.value.
if (MDNode *SIMD = SI->getMetadata("dbg"))
DbgVal->setMetadata("dbg", SIMD);
+ // Otherwise propagate debug metadata from dbg.declare.
+ else if (MDNode *MD = DDI->getMetadata("dbg"))
+ DbgVal->setMetadata("dbg", MD);
}
// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 9f2209d..fd3ed3e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1513,17 +1513,19 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
// Okay, we're going to insert the PHI node. Since PBI is not the only
// predecessor, compute the PHI'd conditional value for all of the preds.
// Any predecessor where the condition is not computable we keep symbolic.
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- if ((PBI = dyn_cast<BranchInst>((*PI)->getTerminator())) &&
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ BasicBlock *P = *PI;
+ if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) &&
PBI != BI && PBI->isConditional() &&
PBI->getCondition() == BI->getCondition() &&
PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
bool CondIsTrue = PBI->getSuccessor(0) == BB;
NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- CondIsTrue), *PI);
+ CondIsTrue), P);
} else {
- NewPN->addIncoming(BI->getCondition(), *PI);
+ NewPN->addIncoming(BI->getCondition(), P);
}
+ }
BI->setCondition(NewPN);
return true;
@@ -1697,10 +1699,11 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
SmallVector<BasicBlock*, 8> UncondBranchPreds;
SmallVector<BranchInst*, 8> CondBranchPreds;
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- TerminatorInst *PTI = (*PI)->getTerminator();
+ BasicBlock *P = *PI;
+ TerminatorInst *PTI = P->getTerminator();
if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
if (BI->isUnconditional())
- UncondBranchPreds.push_back(*PI);
+ UncondBranchPreds.push_back(P);
else
CondBranchPreds.push_back(BI);
}
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 87ce631..3f6a90c 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -28,7 +28,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
// DenseMap. This includes any recursive calls to MapValue.
// Global values and non-function-local metadata do not need to be seeded into
- // the ValueMap if they are using the identity mapping.
+ // the VM if they are using the identity mapping.
if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V) ||
(isa<MDNode>(V) && !cast<MDNode>(V)->isFunctionLocal()))
return VMSlot = const_cast<Value*>(V);
@@ -45,7 +45,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
if (isa<ConstantInt>(C) || isa<ConstantFP>(C) ||
isa<ConstantPointerNull>(C) || isa<ConstantAggregateZero>(C) ||
- isa<UndefValue>(C) || isa<MDString>(C))
+ isa<UndefValue>(C))
return VMSlot = C; // Primitive constants map directly
if (ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
@@ -125,11 +125,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) {
}
/// RemapInstruction - Convert the instruction operands from referencing the
-/// current values into those specified by ValueMap.
+/// current values into those specified by VMap.
///
-void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) {
+void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) {
for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
- Value *V = MapValue(*op, ValueMap);
+ Value *V = MapValue(*op, VMap);
assert(V && "Referenced value not in value map!");
*op = V;
}
diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h
index d61c24c..f4ff643 100644
--- a/lib/Transforms/Utils/ValueMapper.h
+++ b/lib/Transforms/Utils/ValueMapper.h
@@ -15,12 +15,12 @@
#ifndef VALUEMAPPER_H
#define VALUEMAPPER_H
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ValueMap.h"
namespace llvm {
class Value;
class Instruction;
- typedef DenseMap<const Value *, Value *> ValueToValueMapTy;
+ typedef ValueMap<const Value *, Value *> ValueToValueMapTy;
Value *MapValue(const Value *V, ValueToValueMapTy &VM);
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index e48c026..7a471ef 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -70,8 +70,7 @@ static const Module *getModuleFromVal(const Value *V) {
// PrintEscapedString - Print each character of the specified string, escaping
// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(const StringRef &Name,
- raw_ostream &Out) {
+static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
for (unsigned i = 0, e = Name.size(); i != e; ++i) {
unsigned char C = Name[i];
if (isprint(C) && C != '\\' && C != '"')
@@ -1419,6 +1418,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::ExternalLinkage: break;
case GlobalValue::PrivateLinkage: Out << "private "; break;
case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ Out << "linker_private_weak ";
+ break;
case GlobalValue::InternalLinkage: Out << "internal "; break;
case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break;
case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break;
@@ -1469,8 +1471,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
writeOperand(GV->getInitializer(), false);
}
- if (GV->hasSection())
- Out << ", section \"" << GV->getSection() << '"';
+ if (GV->hasSection()) {
+ Out << ", section \"";
+ PrintEscapedString(GV->getSection(), Out);
+ Out << '"';
+ }
if (GV->getAlignment())
Out << ", align " << GV->getAlignment();
@@ -1628,8 +1633,11 @@ void AssemblyWriter::printFunction(const Function *F) {
Attributes FnAttrs = Attrs.getFnAttributes();
if (FnAttrs != Attribute::None)
Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes());
- if (F->hasSection())
- Out << " section \"" << F->getSection() << '"';
+ if (F->hasSection()) {
+ Out << " section \"";
+ PrintEscapedString(F->getSection(), Out);
+ Out << '"';
+ }
if (F->getAlignment())
Out << " align " << F->getAlignment();
if (F->hasGC())
@@ -1854,6 +1862,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
default: Out << " cc" << CI->getCallingConv(); break;
}
+ Operand = CI->getCalledValue();
const PointerType *PTy = cast<PointerType>(Operand->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
const Type *RetTy = FTy->getReturnType();
@@ -1877,10 +1886,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(Operand, true);
}
Out << '(';
- for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) {
- if (op > 1)
+ for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) {
+ if (op > 0)
Out << ", ";
- writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op));
+ writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1));
}
Out << ')';
if (PAL.getFnAttributes() != Attribute::None)
@@ -1925,10 +1934,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(Operand, true);
}
Out << '(';
- for (unsigned op = 0, Eop = I.getNumOperands() - 3; op < Eop; ++op) {
+ for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) {
if (op)
Out << ", ";
- writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1));
+ writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1));
}
Out << ')';
@@ -2027,7 +2036,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
static void WriteMDNodeComment(const MDNode *Node,
- formatted_raw_ostream &Out) {
+ formatted_raw_ostream &Out) {
if (Node->getNumOperands() < 1)
return;
ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 0144210..dc39024 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -18,6 +18,7 @@
#include "llvm/Module.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/IRBuilder.h"
#include <cstring>
@@ -314,7 +315,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Function *F = CI->getCalledFunction();
LLVMContext &C = CI->getContext();
-
+ ImmutableCallSite CS(CI);
+
assert(F && "CallInst has no function associated with it.");
if (!NewFn) {
@@ -344,11 +346,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
std::vector<Constant*> Idxs;
- Value *Op0 = CI->getOperand(1);
+ Value *Op0 = CI->getArgOperand(0);
ShuffleVectorInst *SI = NULL;
if (isLoadH || isLoadL) {
Value *Op1 = UndefValue::get(Op0->getType());
- Value *Addr = new BitCastInst(CI->getOperand(2),
+ Value *Addr = new BitCastInst(CI->getArgOperand(1),
Type::getDoublePtrTy(C),
"upgraded.", CI);
Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
@@ -381,7 +383,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
} else if (isMovSD ||
isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
- Value *Op1 = CI->getOperand(2);
+ Value *Op1 = CI->getArgOperand(1);
if (isMovSD) {
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
@@ -395,8 +397,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
Value *Mask = ConstantVector::get(Idxs);
SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
} else if (isShufPD) {
- Value *Op1 = CI->getOperand(2);
- unsigned MaskVal = cast<ConstantInt>(CI->getOperand(3))->getZExtValue();
+ Value *Op1 = CI->getArgOperand(1);
+ unsigned MaskVal = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
((MaskVal >> 1) & 1)+2));
@@ -416,8 +418,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.sse41.pmulld") {
// Upgrade this set of intrinsics into vector multiplies.
- Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
- CI->getOperand(2),
+ Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
+ CI->getArgOperand(1),
CI->getName(),
CI);
// Fix up all the uses with our new multiply.
@@ -427,9 +429,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Remove upgraded multiply.
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.ssse3.palign.r") {
- Value *Op1 = CI->getOperand(1);
- Value *Op2 = CI->getOperand(2);
- Value *Op3 = CI->getOperand(3);
+ Value *Op1 = CI->getArgOperand(0);
+ Value *Op2 = CI->getArgOperand(1);
+ Value *Op3 = CI->getArgOperand(2);
unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
Value *Rep;
IRBuilder<> Builder(C);
@@ -483,9 +485,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
CI->eraseFromParent();
} else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
- Value *Op1 = CI->getOperand(1);
- Value *Op2 = CI->getOperand(2);
- Value *Op3 = CI->getOperand(3);
+ Value *Op1 = CI->getArgOperand(0);
+ Value *Op2 = CI->getArgOperand(1);
+ Value *Op3 = CI->getArgOperand(2);
unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
Value *Rep;
IRBuilder<> Builder(C);
@@ -556,10 +558,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::x86_mmx_psrl_w: {
Value *Operands[2];
- Operands[0] = CI->getOperand(1);
+ Operands[0] = CI->getArgOperand(0);
// Cast the second parameter to the correct type.
- BitCastInst *BC = new BitCastInst(CI->getOperand(2),
+ BitCastInst *BC = new BitCastInst(CI->getArgOperand(1),
NewFn->getFunctionType()->getParamType(1),
"upgraded.", CI);
Operands[1] = BC;
@@ -583,9 +585,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::ctlz:
case Intrinsic::ctpop:
case Intrinsic::cttz: {
- // Build a small vector of the 1..(N-1) operands, which are the
- // parameters.
- SmallVector<Value*, 8> Operands(CI->op_begin()+1, CI->op_end());
+ // Build a small vector of the original arguments.
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
// Construct a new CallInst
CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
@@ -620,7 +621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::eh_selector:
case Intrinsic::eh_typeid_for: {
// Only the return type changed.
- SmallVector<Value*, 8> Operands(CI->op_begin() + 1, CI->op_end());
+ SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
"upgraded." + CI->getName(), CI);
NewCI->setTailCall(CI->isTailCall());
@@ -643,8 +644,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
case Intrinsic::memset: {
// Add isVolatile
const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
- Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2),
- CI->getOperand(3), CI->getOperand(4),
+ Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
+ CI->getArgOperand(2), CI->getArgOperand(3),
llvm::ConstantInt::get(I1Ty, 0) };
CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
CI->getName(), CI);
@@ -726,7 +727,8 @@ void llvm::CheckDebugInfoIntrinsics(Module *M) {
if (Function *Declare = M->getFunction("llvm.dbg.declare")) {
if (!Declare->use_empty()) {
DbgDeclareInst *DDI = cast<DbgDeclareInst>(Declare->use_back());
- if (!isa<MDNode>(DDI->getOperand(1)) ||!isa<MDNode>(DDI->getOperand(2))) {
+ if (!isa<MDNode>(DDI->getArgOperand(0)) ||
+ !isa<MDNode>(DDI->getArgOperand(1))) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
CI->eraseFromParent();
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 549977c..3567266 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -658,7 +658,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
}
}
// Handle an offsetof-like expression.
- if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
DestTy, false))
return C;
@@ -1817,8 +1817,15 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return Constant::getAllOnesValue(ResultTy);
// Handle some degenerate cases first
- if (isa<UndefValue>(C1) || isa<UndefValue>(C2))
+ if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+ return UndefValue::get(ResultTy);
+ // Otherwise, pick the same value as the non-undef operand, and fold
+ // it to true or false.
return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred));
+ }
// No compile-time operations on this type yet.
if (C1->getType()->isPPC_FP128Ty())
@@ -2194,7 +2201,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
}
NewIndices.push_back(Combined);
- NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx);
+ NewIndices.append(Idxs+1, Idxs+NumIdx);
return (inBounds && cast<GEPOperator>(CE)->isInBounds()) ?
ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0),
&NewIndices[0],
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index bbf1375..ca1a399 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1058,6 +1058,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) {
return LLVMPrivateLinkage;
case GlobalValue::LinkerPrivateLinkage:
return LLVMLinkerPrivateLinkage;
+ case GlobalValue::LinkerPrivateWeakLinkage:
+ return LLVMLinkerPrivateWeakLinkage;
case GlobalValue::DLLImportLinkage:
return LLVMDLLImportLinkage;
case GlobalValue::DLLExportLinkage:
@@ -1108,6 +1110,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) {
case LLVMLinkerPrivateLinkage:
GV->setLinkage(GlobalValue::LinkerPrivateLinkage);
break;
+ case LLVMLinkerPrivateWeakLinkage:
+ GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage);
+ break;
case LLVMDLLImportLinkage:
GV->setLinkage(GlobalValue::DLLImportLinkage);
break;
@@ -2205,15 +2210,14 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile(
LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- MemoryBuffer *MB = MemoryBuffer::getSTDIN();
- if (!MB->getBufferSize()) {
- delete MB;
- *OutMessage = strdup("stdin is empty.");
- return 1;
+ std::string Error;
+ if (MemoryBuffer *MB = MemoryBuffer::getSTDIN(&Error)) {
+ *OutMemBuf = wrap(MB);
+ return 0;
}
- *OutMemBuf = wrap(MB);
- return 0;
+ *OutMessage = strdup(Error.c_str());
+ return 1;
}
void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index a37fe07..9792ada 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -286,9 +286,10 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
// PHI nodes uses values in the corresponding predecessor block. For other
// instructions, just check to see whether the parent of the use matches up.
- const PHINode *PN = dyn_cast<PHINode>(*UI);
+ const User *U = *UI;
+ const PHINode *PN = dyn_cast<PHINode>(U);
if (PN == 0) {
- if (cast<Instruction>(*UI)->getParent() != BB)
+ if (cast<Instruction>(U)->getParent() != BB)
return true;
continue;
}
@@ -401,12 +402,20 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
return false;
// Note that it is not safe to speculate into a malloc'd region because
// malloc may return null.
- if (isa<AllocaInst>(getOperand(0)))
+ // It's also not safe to follow a bitcast, for example:
+ // bitcast i8* (alloca i8) to i32*
+ // would result in a 4-byte load from a 1-byte alloca.
+ Value *Op0 = getOperand(0);
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0)) {
+ // TODO: it's safe to do this for any GEP with constant indices that
+ // compute inside the allocated type, but not for any inbounds gep.
+ if (GEP->hasAllZeroIndices())
+ Op0 = GEP->getPointerOperand();
+ }
+ if (isa<AllocaInst>(Op0))
return true;
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(getOperand(0)))
return !GV->hasExternalWeakLinkage();
- // FIXME: Handle cases involving GEPs. We have to be careful because
- // a load of a out-of-bounds GEP has undefined behavior.
return false;
}
case Call:
@@ -421,6 +430,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const {
case Store:
case Ret:
case Br:
+ case IndirectBr:
case Switch:
case Unwind:
case Unreachable:
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index f64b220..c13696f 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -33,7 +33,9 @@ using namespace llvm;
User::op_iterator CallSite::getCallee() const {
Instruction *II(getInstruction());
return isCall()
- ? cast<CallInst>(II)->op_begin()
+ ? (CallInst::ArgOffset
+ ? cast</*FIXME: CallInst*/User>(II)->op_begin()
+ : cast</*FIXME: CallInst*/User>(II)->op_end() - 1)
: cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
}
@@ -231,8 +233,7 @@ CallInst::~CallInst() {
void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert(NumOperands == NumParams+1 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
+ Op<ArgOffset -1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -245,16 +246,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Params[i]->getType()) &&
"Calling a function with a bad signature!");
- OL[i+1] = Params[i];
+ OperandList[i + ArgOffset] = Params[i];
}
}
void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
assert(NumOperands == 3 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
- OL[1] = Actual1;
- OL[2] = Actual2;
+ Op<ArgOffset -1>() = Func;
+ Op<ArgOffset + 0>() = Actual1;
+ Op<ArgOffset + 1>() = Actual2;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -273,9 +273,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
void CallInst::init(Value *Func, Value *Actual) {
assert(NumOperands == 2 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
- OL[1] = Actual;
+ Op<ArgOffset -1>() = Func;
+ Op<ArgOffset + 0>() = Actual;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -291,8 +290,7 @@ void CallInst::init(Value *Func, Value *Actual) {
void CallInst::init(Value *Func) {
assert(NumOperands == 1 && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Func;
+ Op<ArgOffset -1>() = Func;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
@@ -473,9 +471,10 @@ static Instruction *createMalloc(Instruction *InsertBefore,
Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
const Type *IntPtrTy, const Type *AllocTy,
Value *AllocSize, Value *ArraySize,
+ Function * MallocF,
const Twine &Name) {
return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
- ArraySize, NULL, Name);
+ ArraySize, MallocF, Name);
}
/// CreateMalloc - Generate the IR for a call to malloc:
@@ -527,8 +526,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
}
/// CreateFree - Generate the IR for a call to the builtin free function.
-void CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
- createFree(Source, InsertBefore, NULL);
+Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) {
+ return createFree(Source, InsertBefore, NULL);
}
/// CreateFree - Generate the IR for a call to the builtin free function.
@@ -828,8 +827,8 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
else {
assert(!isa<BasicBlock>(Amt) &&
"Passed basic block into allocation size parameter! Use other ctor");
- assert(Amt->getType()->isIntegerTy(32) &&
- "Allocation array size is not a 32-bit integer!");
+ assert(Amt->getType()->isIntegerTy() &&
+ "Allocation array size is not an integer!");
}
return Amt;
}
@@ -1456,7 +1455,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx,
Op<0>() = Agg;
Op<1>() = Val;
- Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ Indices.append(Idx, Idx + NumIdx);
setName(Name);
}
@@ -1509,7 +1508,7 @@ void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
const Twine &Name) {
assert(NumOperands == 1 && "NumOperands not initialized?");
- Indices.insert(Indices.end(), Idx, Idx + NumIdx);
+ Indices.append(Idx, Idx + NumIdx);
setName(Name);
}
@@ -1911,9 +1910,12 @@ bool CastInst::isLosslessCast() const {
/// # bitcast i32* %x to i8*
/// # bitcast <2 x i32> %x to <4 x i16>
/// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only
-/// @brief Determine if a cast is a no-op.
-bool CastInst::isNoopCast(const Type *IntPtrTy) const {
- switch (getOpcode()) {
+/// @brief Determine if the described cast is a no-op.
+bool CastInst::isNoopCast(Instruction::CastOps Opcode,
+ const Type *SrcTy,
+ const Type *DestTy,
+ const Type *IntPtrTy) {
+ switch (Opcode) {
default:
assert(!"Invalid CastOp");
case Instruction::Trunc:
@@ -1930,13 +1932,18 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const {
return true; // BitCast never modifies bits.
case Instruction::PtrToInt:
return IntPtrTy->getScalarSizeInBits() ==
- getType()->getScalarSizeInBits();
+ DestTy->getScalarSizeInBits();
case Instruction::IntToPtr:
return IntPtrTy->getScalarSizeInBits() ==
- getOperand(0)->getType()->getScalarSizeInBits();
+ SrcTy->getScalarSizeInBits();
}
}
+/// @brief Determine if a cast is a no-op.
+bool CastInst::isNoopCast(const Type *IntPtrTy) const {
+ return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy);
+}
+
/// This function determines if a pair of casts can be eliminated and what
/// opcode should be used in the elimination. This assumes that there are two
/// instructions like this:
@@ -1999,6 +2006,14 @@ unsigned CastInst::isEliminableCastPair(
{ 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr |
{ 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+
};
+
+ // If either of the casts are a bitcast from scalar to vector, disallow the
+ // merging.
+ if ((firstOp == Instruction::BitCast &&
+ isa<VectorType>(SrcTy) != isa<VectorType>(MidTy)) ||
+ (secondOp == Instruction::BitCast &&
+ isa<VectorType>(MidTy) != isa<VectorType>(DstTy)))
+ return 0; // Disallowed
int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin]
[secondOp-Instruction::CastOpsBegin];
diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp
index c37d5b0..ac8ec20 100644
--- a/lib/VMCore/IntrinsicInst.cpp
+++ b/lib/VMCore/IntrinsicInst.cpp
@@ -54,7 +54,7 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) {
///
Value *DbgDeclareInst::getAddress() const {
- if (MDNode* MD = cast_or_null<MDNode>(getOperand(1)))
+ if (MDNode* MD = cast_or_null<MDNode>(getArgOperand(0)))
return MD->getOperand(0);
else
return NULL;
@@ -65,9 +65,9 @@ Value *DbgDeclareInst::getAddress() const {
///
const Value *DbgValueInst::getValue() const {
- return cast<MDNode>(getOperand(1))->getOperand(0);
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
}
Value *DbgValueInst::getValue() {
- return cast<MDNode>(getOperand(1))->getOperand(0);
+ return cast<MDNode>(getArgOperand(0))->getOperand(0);
}
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index b894ea3..1d3a058 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -133,6 +133,7 @@ static const Function *getFunctionForValue(Value *V) {
static const Function *assertLocalFunction(const MDNode *N) {
if (!N->isFunctionLocal()) return 0;
+ // FIXME: This does not handle cyclic function local metadata.
const Function *F = 0, *NewF = 0;
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (Value *V = N->getOperand(i)) {
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 94840f0..38a51df 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -17,6 +17,7 @@
#include "llvm/DerivedTypes.h"
#include "llvm/GVMaterializer.h"
#include "llvm/LLVMContext.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/LeakDetector.h"
@@ -311,9 +312,11 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const {
/// getNamedMetadata - Return the first NamedMDNode in the module with the
/// specified name. This method returns null if a NamedMDNode with the
-//// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
- return NamedMDSymTab->lookup(Name);
+/// specified name is not found.
+NamedMDNode *Module::getNamedMetadata(const Twine &Name) const {
+ SmallString<256> NameData;
+ StringRef NameRef = Name.toStringRef(NameData);
+ return NamedMDSymTab->lookup(NameRef);
}
/// getOrInsertNamedMetadata - Return the first named MDNode in the module
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index a60877d..efd98af 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -35,6 +35,15 @@ using namespace llvm;
// Pass Implementation
//
+Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) {
+ assert(pid && "pid cannot be 0");
+}
+
+Pass::Pass(PassKind K, const void *pid)
+ : Resolver(0), PassID((intptr_t)pid), Kind(K) {
+ assert(pid && "pid cannot be 0");
+}
+
// Force out-of-line virtual method.
Pass::~Pass() {
delete Resolver;
@@ -92,6 +101,23 @@ void Pass::verifyAnalysis() const {
// By default, don't do anything.
}
+void *Pass::getAdjustedAnalysisPointer(const PassInfo *) {
+ return this;
+}
+
+ImmutablePass *Pass::getAsImmutablePass() {
+ return 0;
+}
+
+PMDataManager *Pass::getAsPMDataManager() {
+ return 0;
+}
+
+void Pass::setResolver(AnalysisResolver *AR) {
+ assert(!Resolver && "Resolver is already set");
+ Resolver = AR;
+}
+
// print - Print out the internal state of the pass. This is called by Analyze
// to print out the contents of an analysis. Otherwise it is not necessary to
// implement this method.
@@ -364,6 +390,14 @@ void PassInfo::unregisterPass() {
getPassRegistrar()->UnregisterPass(*this);
}
+Pass *PassInfo::createPass() const {
+ assert((!isAnalysisGroup() || NormalCtor) &&
+ "No default implementation found for analysis group!");
+ assert(NormalCtor &&
+ "Cannot call createPass on PassInfo without default ctor!");
+ return NormalCtor();
+}
+
//===----------------------------------------------------------------------===//
// Analysis Group Implementation Code
//===----------------------------------------------------------------------===//
@@ -467,4 +501,15 @@ void AnalysisUsage::setPreservesCFG() {
GetCFGOnlyPasses(Preserved).enumeratePasses();
}
+AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) {
+ assert(ID && "Pass class not registered!");
+ Required.push_back(ID);
+ return *this;
+}
+AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) {
+ assert(ID && "Pass class not registered!");
+ Required.push_back(ID);
+ RequiredTransitive.push_back(ID);
+ return *this;
+}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index a56938c..296b0d1 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1147,6 +1147,11 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) {
llvm_unreachable("Unable to schedule pass");
}
+Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) {
+ assert(0 && "Unable to find on the fly pass");
+ return NULL;
+}
+
// Destructor
PMDataManager::~PMDataManager() {
for (SmallVector<Pass *, 8>::iterator I = PassVector.begin(),
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 645dd5a..585edf0 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -322,7 +322,13 @@ void Value::replaceAllUsesWith(Value *New) {
Value *Value::stripPointerCasts() {
if (!getType()->isPointerTy())
return this;
+
+ // Even though we don't look through PHI nodes, we could be called on an
+ // instruction in an unreachable block, which may be on a cycle.
+ SmallPtrSet<Value *, 4> Visited;
+
Value *V = this;
+ Visited.insert(V);
do {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
if (!GEP->hasAllZeroIndices())
@@ -338,7 +344,9 @@ Value *Value::stripPointerCasts() {
return V;
}
assert(V->getType()->isPointerTy() && "Unexpected operand type!");
- } while (1);
+ } while (Visited.insert(V));
+
+ return V;
}
Value *Value::getUnderlyingObject(unsigned MaxLookup) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 75988cc..f97699d 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -85,7 +85,8 @@ namespace { // Anonymous namespace for class
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
if (I->empty() || !I->back().isTerminator()) {
- dbgs() << "Basic Block does not have terminator!\n";
+ dbgs() << "Basic Block in function '" << F.getName()
+ << "' does not have terminator!\n";
WriteAsOperand(dbgs(), I, true);
dbgs() << "\n";
Broken = true;
@@ -1356,7 +1357,7 @@ void Verifier::visitLoadInst(LoadInst &LI) {
void Verifier::visitStoreInst(StoreInst &SI) {
const PointerType *PTy = dyn_cast<PointerType>(SI.getOperand(1)->getType());
- Assert1(PTy, "Load operand must be a pointer.", &SI);
+ Assert1(PTy, "Store operand must be a pointer.", &SI);
const Type *ElTy = PTy->getElementType();
Assert2(ElTy == SI.getOperand(0)->getType(),
"Stored value type does not match pointer operand type!",
@@ -1371,8 +1372,8 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
&AI);
Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type",
&AI);
- Assert1(AI.getArraySize()->getType()->isIntegerTy(32),
- "Alloca array size must be i32", &AI);
+ Assert1(AI.getArraySize()->getType()->isIntegerTy(),
+ "Alloca array size must have integer type", &AI);
visitInstruction(AI);
}
@@ -1453,7 +1454,7 @@ void Verifier::visitInstruction(Instruction &I) {
if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
// Check to make sure that the "address of" an intrinsic function is never
// taken.
- Assert1(!F->isIntrinsic() || (i == 0 && isa<CallInst>(I)),
+ Assert1(!F->isIntrinsic() || (i + 1 == e && isa<CallInst>(I)),
"Cannot take the address of an intrinsic!", &I);
Assert1(F->getParent() == Mod, "Referencing function in another module!",
&I);
@@ -1536,7 +1537,8 @@ void Verifier::visitInstruction(Instruction &I) {
"Instruction does not dominate all uses!", Op, &I);
}
} else if (isa<InlineAsm>(I.getOperand(i))) {
- Assert1((i == 0 && isa<CallInst>(I)) || (i + 3 == e && isa<InvokeInst>(I)),
+ Assert1((i + 1 == e && isa<CallInst>(I)) ||
+ (i + 3 == e && isa<InvokeInst>(I)),
"Cannot take the address of an inline asm!", &I);
}
}
@@ -1628,24 +1630,24 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
// If the intrinsic takes MDNode arguments, verify that they are either global
// or are local to *this* function.
- for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i)
- if (MDNode *MD = dyn_cast<MDNode>(CI.getOperand(i)))
+ for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i)
+ if (MDNode *MD = dyn_cast<MDNode>(CI.getArgOperand(i)))
visitMDNode(*MD, CI.getParent()->getParent());
switch (ID) {
default:
break;
case Intrinsic::dbg_declare: { // llvm.dbg.declare
- Assert1(CI.getOperand(1) && isa<MDNode>(CI.getOperand(1)),
+ Assert1(CI.getArgOperand(0) && isa<MDNode>(CI.getArgOperand(0)),
"invalid llvm.dbg.declare intrinsic call 1", &CI);
- MDNode *MD = cast<MDNode>(CI.getOperand(1));
+ MDNode *MD = cast<MDNode>(CI.getArgOperand(0));
Assert1(MD->getNumOperands() == 1,
"invalid llvm.dbg.declare intrinsic call 2", &CI);
} break;
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
- Assert1(isa<ConstantInt>(CI.getOperand(4)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(3)),
"alignment argument of memory intrinsics must be a constant int",
&CI);
break;
@@ -1654,10 +1656,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
case Intrinsic::gcread:
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
- dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
+ dyn_cast<AllocaInst>(CI.getArgOperand(0)->stripPointerCasts());
Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
"llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
- Assert1(isa<Constant>(CI.getOperand(2)),
+ Assert1(isa<Constant>(CI.getArgOperand(1)),
"llvm.gcroot parameter #2 must be a constant.", &CI);
}
@@ -1665,32 +1667,32 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
"Enclosing function does not use GC.", &CI);
break;
case Intrinsic::init_trampoline:
- Assert1(isa<Function>(CI.getOperand(2)->stripPointerCasts()),
+ Assert1(isa<Function>(CI.getArgOperand(1)->stripPointerCasts()),
"llvm.init_trampoline parameter #2 must resolve to a function.",
&CI);
break;
case Intrinsic::prefetch:
- Assert1(isa<ConstantInt>(CI.getOperand(2)) &&
- isa<ConstantInt>(CI.getOperand(3)) &&
- cast<ConstantInt>(CI.getOperand(2))->getZExtValue() < 2 &&
- cast<ConstantInt>(CI.getOperand(3))->getZExtValue() < 4,
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)) &&
+ isa<ConstantInt>(CI.getArgOperand(2)) &&
+ cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue() < 2 &&
+ cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue() < 4,
"invalid arguments to llvm.prefetch",
&CI);
break;
case Intrinsic::stackprotector:
- Assert1(isa<AllocaInst>(CI.getOperand(2)->stripPointerCasts()),
+ Assert1(isa<AllocaInst>(CI.getArgOperand(1)->stripPointerCasts()),
"llvm.stackprotector parameter #2 must resolve to an alloca.",
&CI);
break;
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
case Intrinsic::invariant_start:
- Assert1(isa<ConstantInt>(CI.getOperand(1)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(0)),
"size argument of memory use markers must be a constant integer",
&CI);
break;
case Intrinsic::invariant_end:
- Assert1(isa<ConstantInt>(CI.getOperand(2)),
+ Assert1(isa<ConstantInt>(CI.getArgOperand(1)),
"llvm.invariant.end parameter #2 must be a constant integer", &CI);
break;
}
OpenPOWER on IntegriCloud