summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-05-27 15:15:58 +0000
commit1e3dec662ea18131c495db50caccc57f77b7a5fe (patch)
tree9fad9a5d5dd8c4ff54af48edad9c8cc26dd5fda1 /lib
parent377552607e51dc1d3e6ff33833f9620bcfe815ac (diff)
downloadFreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.zip
FreeBSD-src-1e3dec662ea18131c495db50caccc57f77b7a5fe.tar.gz
Update LLVM to r104832.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/DebugInfo.cpp414
-rw-r--r--lib/Analysis/InlineCost.cpp13
-rw-r--r--lib/Analysis/Lint.cpp23
-rw-r--r--lib/Analysis/ModuleDebugInfoPrinter.cpp86
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp2
-rw-r--r--lib/AsmParser/LLToken.h2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp76
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp14
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1196
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h135
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h4
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp27
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp6
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp6
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp29
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp26
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp75
-rw-r--r--lib/CodeGen/LowerSubregs.cpp9
-rw-r--r--lib/CodeGen/MachineCSE.cpp113
-rw-r--r--lib/CodeGen/MachineFunction.cpp10
-rw-r--r--lib/CodeGen/MachineInstr.cpp90
-rw-r--r--lib/CodeGen/MachineLICM.cpp13
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp33
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp546
-rw-r--r--lib/CodeGen/MachineSink.cpp5
-rw-r--r--lib/CodeGen/MachineVerifier.cpp35
-rw-r--r--lib/CodeGen/PHIElimination.cpp67
-rw-r--r--lib/CodeGen/PHIElimination.h2
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp64
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp12
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp43
-rw-r--r--lib/CodeGen/RegAllocFast.cpp1344
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp2
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp69
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp4
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp1
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp144
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp262
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp98
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h10
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp147
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp71
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h22
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp53
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp89
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp4
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp59
-rw-r--r--lib/CodeGen/Spiller.cpp23
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp22
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp8
-rw-r--r--lib/CodeGen/TailDuplication.cpp4
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp208
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp227
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp44
-rw-r--r--lib/CompilerDriver/Action.cpp46
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp4
-rw-r--r--lib/MC/CMakeLists.txt3
-rw-r--r--lib/MC/MCAsmInfo.cpp1
-rw-r--r--lib/MC/MCAsmInfoDarwin.cpp1
-rw-r--r--lib/MC/MCAsmStreamer.cpp59
-rw-r--r--lib/MC/MCAssembler.cpp673
-rw-r--r--lib/MC/MCContext.cpp53
-rw-r--r--lib/MC/MCExpr.cpp14
-rw-r--r--lib/MC/MCInst.cpp2
-rw-r--r--lib/MC/MCLabel.cpp21
-rw-r--r--lib/MC/MCLoggingStreamer.cpp208
-rw-r--r--lib/MC/MCMachOStreamer.cpp226
-rw-r--r--lib/MC/MCNullStreamer.cpp9
-rw-r--r--lib/MC/MCParser/AsmLexer.cpp12
-rw-r--r--lib/MC/MCParser/AsmParser.cpp251
-rw-r--r--lib/MC/MCSection.cpp27
-rw-r--r--lib/MC/MCSectionCOFF.cpp76
-rw-r--r--lib/MC/MCSectionMachO.cpp11
-rw-r--r--lib/MC/MCStreamer.cpp2
-rw-r--r--lib/MC/MCSymbol.cpp12
-rw-r--r--lib/MC/MachObjectWriter.cpp201
-rw-r--r--lib/Support/APInt.cpp7
-rw-r--r--lib/Support/CommandLine.cpp2
-rw-r--r--lib/Support/ErrorHandling.cpp7
-rw-r--r--lib/Support/PrettyStackTrace.cpp4
-rw-r--r--lib/Support/StringRef.cpp28
-rw-r--r--lib/Support/Timer.cpp4
-rw-r--r--lib/Support/Twine.cpp8
-rw-r--r--lib/Support/raw_ostream.cpp64
-rw-r--r--lib/System/Unix/Signals.inc4
-rw-r--r--lib/System/Win32/Signals.inc4
-rw-r--r--lib/Target/ARM/ARM.h8
-rw-r--r--lib/Target/ARM/ARM.td12
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp259
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h14
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp248
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp170
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp124
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp510
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp388
-rw-r--r--lib/Target/ARM/ARMISelLowering.h21
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td24
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td78
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td98
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td64
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td143
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td16
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp12
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h10
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td180
-rw-r--r--lib/Target/ARM/ARMRelocations.h8
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.cpp116
-rw-r--r--lib/Target/ARM/ARMSelectionDAGInfo.h17
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp4
-rw-r--r--lib/Target/ARM/ARMSubtarget.h13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h11
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp35
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp7
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.h2
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp128
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp50
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h15
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp30
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h9
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp19
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h9
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Alpha/AlphaSelectionDAGInfo.h4
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp3
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h5
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.cpp11
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.h9
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td30
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp10
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h7
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.td54
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp6
-rw-r--r--lib/Target/Blackfin/BlackfinSelectionDAGInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.cpp1
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.h5
-rw-r--r--lib/Target/CBackend/CBackend.cpp13
-rw-r--r--lib/Target/CBackend/CTargetMachine.h11
-rw-r--r--lib/Target/CellSPU/README.txt2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp2
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp23
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h9
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td6
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/CellSPU/SPUSelectionDAGInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp1
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h6
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp15
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h11
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp14
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h9
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp12
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td10
-rw-r--r--lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MBlaze/MBlazeSelectionDAGInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h5
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp23
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h2
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp6
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp18
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h15
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.td13
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/MSP430/MSP430SelectionDAGInfo.h4
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.h6
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp12
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp16
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp10
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h9
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.td26
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Mips/MipsSelectionDAGInfo.h4
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h6
-rw-r--r--lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h2
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp14
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h2
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp11
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.h9
-rw-r--r--lib/Target/PIC16/PIC16Section.h8
-rw-r--r--lib/Target/PIC16/PIC16SelectionDAGInfo.cpp5
-rw-r--r--lib/Target/PIC16/PIC16SelectionDAGInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp2
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.h6
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp8
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp5
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h7
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp17
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h9
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td18
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp14
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td23
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/PowerPC/PPCSelectionDAGInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.h5
-rw-r--r--lib/Target/README.txt15
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp12
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h9
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.td6
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/Sparc/SparcSelectionDAGInfo.h4
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h5
-rw-r--r--lib/Target/SubtargetFeature.cpp34
-rw-r--r--lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp22
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h15
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h11
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.td58
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/SystemZ/SystemZSelectionDAGInfo.h4
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.h6
-rw-r--r--lib/Target/TargetMachine.cpp12
-rw-r--r--lib/Target/TargetRegisterInfo.cpp6
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp199
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp94
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.h2
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp139
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.h2
-rw-r--r--lib/Target/X86/CMakeLists.txt7
-rw-r--r--lib/Target/X86/Disassembler/X86Disassembler.cpp61
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.c8
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoder.h33
-rw-r--r--lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h3
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp4
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp86
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h14
-rw-r--r--lib/Target/X86/X86CallingConv.td14
-rw-r--r--lib/Target/X86/X86FastISel.cpp27
-rw-r--r--lib/Target/X86/X86FloatingPointRegKill.cpp110
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp12
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp370
-rw-r--r--lib/Target/X86/X86ISelLowering.h20
-rw-r--r--lib/Target/X86/X86Instr64bit.td155
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp65
-rw-r--r--lib/Target/X86/X86InstrInfo.h15
-rw-r--r--lib/Target/X86/X86InstrInfo.td174
-rw-r--r--lib/Target/X86/X86InstrMMX.td11
-rw-r--r--lib/Target/X86/X86InstrSSE.td162
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp95
-rw-r--r--lib/Target/X86/X86RegisterInfo.h10
-rw-r--r--lib/Target/X86/X86RegisterInfo.td197
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.cpp225
-rw-r--r--lib/Target/X86/X86SelectionDAGInfo.h32
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp32
-rw-r--r--lib/Target/X86/X86TargetMachine.h5
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp21
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h15
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.cpp5
-rw-r--r--lib/Target/XCore/XCoreSelectionDAGInfo.h4
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.h6
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp18
-rw-r--r--lib/Transforms/IPO/InlineAlways.cpp3
-rw-r--r--lib/Transforms/IPO/InlineSimple.cpp3
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp464
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombine.h4
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp72
-rw-r--r--lib/Transforms/InstCombine/InstCombineWorklist.h2
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/GVN.cpp8
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp490
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp14
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp11
-rw-r--r--lib/Transforms/Scalar/Sink.cpp267
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp2
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp4
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp531
-rw-r--r--lib/VMCore/AsmWriter.cpp30
-rw-r--r--lib/VMCore/PassManager.cpp46
-rw-r--r--lib/VMCore/ValueTypes.cpp6
-rw-r--r--lib/VMCore/Verifier.cpp7
302 files changed, 10725 insertions, 5828 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index ad05dd9..5a37ce0 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -28,6 +28,7 @@ add_llvm_library(LLVMAnalysis
LoopPass.cpp
MemoryBuiltins.cpp
MemoryDependenceAnalysis.cpp
+ ModuleDebugInfoPrinter.cpp
PHITransAddr.cpp
PointerTracking.cpp
PostDominators.cpp
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 141b181..a7b6d2b 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -32,42 +32,6 @@ using namespace llvm::dwarf;
// DIDescriptor
//===----------------------------------------------------------------------===//
-/// ValidDebugInfo - Return true if V represents valid debug info value.
-/// FIXME : Add DIDescriptor.isValid()
-bool DIDescriptor::ValidDebugInfo(MDNode *N, unsigned OptLevel) {
- if (!N)
- return false;
-
- DIDescriptor DI(N);
-
- // Check current version. Allow Version7 for now.
- unsigned Version = DI.getVersion();
- if (Version != LLVMDebugVersion && Version != LLVMDebugVersion7)
- return false;
-
- switch (DI.getTag()) {
- case DW_TAG_variable:
- assert(DIVariable(N).Verify() && "Invalid DebugInfo value");
- break;
- case DW_TAG_compile_unit:
- assert(DICompileUnit(N).Verify() && "Invalid DebugInfo value");
- break;
- case DW_TAG_subprogram:
- assert(DISubprogram(N).Verify() && "Invalid DebugInfo value");
- break;
- case DW_TAG_lexical_block:
- // FIXME: This interfers with the quality of generated code during
- // optimization.
- if (OptLevel != CodeGenOpt::None)
- return false;
- // FALLTHROUGH
- default:
- break;
- }
-
- return true;
-}
-
StringRef
DIDescriptor::getStringField(unsigned Elt) const {
if (DbgNode == 0)
@@ -96,7 +60,7 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
return DIDescriptor();
if (Elt < DbgNode->getNumOperands())
- return DIDescriptor(dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt)));
+ return DIDescriptor(dyn_cast_or_null<const MDNode>(DbgNode->getOperand(Elt)));
return DIDescriptor();
}
@@ -246,7 +210,7 @@ bool DIDescriptor::isEnumerator() const {
// Simple Descriptor Constructors and other Methods
//===----------------------------------------------------------------------===//
-DIType::DIType(MDNode *N) : DIScope(N) {
+DIType::DIType(const MDNode *N) : DIScope(N) {
if (!N) return;
if (!isBasicType() && !isDerivedType() && !isCompositeType()) {
DbgNode = 0;
@@ -271,9 +235,11 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) {
// which, due to uniquing, has merged with the source. We shield clients from
// this detail by allowing a value to be replaced with replaceAllUsesWith()
// itself.
- if (getNode() != D.getNode()) {
- MDNode *Node = DbgNode;
- Node->replaceAllUsesWith(D.getNode());
+ if (DbgNode != D) {
+ MDNode *Node = const_cast<MDNode*>(DbgNode);
+ const MDNode *DN = D;
+ const Value *V = cast_or_null<Value>(DN);
+ Node->replaceAllUsesWith(const_cast<Value*>(V));
Node->destroy();
}
}
@@ -366,6 +332,9 @@ bool DIVariable::Verify() const {
if (!getContext().Verify())
return false;
+ if (!getCompileUnit().Verify())
+ return false;
+
DIType Ty = getType();
if (!Ty.Verify())
return false;
@@ -381,6 +350,17 @@ bool DILocation::Verify() const {
return DbgNode->getNumOperands() == 4;
}
+/// Verify - Verify that a namespace descriptor is well formed.
+bool DINameSpace::Verify() const {
+ if (!DbgNode)
+ return false;
+ if (getName().empty())
+ return false;
+ if (!getCompileUnit().Verify())
+ return false;
+ return true;
+}
+
/// getOriginalTypeSize - If this type is derived from a base type then
/// return base type size.
uint64_t DIDerivedType::getOriginalTypeSize() const {
@@ -394,7 +374,7 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
if (!BaseType.isValid())
return getSizeInBits();
if (BaseType.isDerivedType())
- return DIDerivedType(BaseType.getNode()).getOriginalTypeSize();
+ return DIDerivedType(BaseType).getOriginalTypeSize();
else
return BaseType.getSizeInBits();
}
@@ -410,7 +390,7 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) {
return false;
// This variable is not inlined function argument if its scope
// does not describe current function.
- return !(DISubprogram(getContext().getNode()).describes(CurFn));
+ return !(DISubprogram(getContext()).describes(CurFn));
}
/// describes - Return true if this subprogram provides debugging
@@ -475,144 +455,182 @@ StringRef DIScope::getDirectory() const {
//===----------------------------------------------------------------------===//
-/// dump - Print descriptor.
-void DIDescriptor::dump() const {
- dbgs() << "[" << dwarf::TagString(getTag()) << "] ";
- dbgs().write_hex((intptr_t) &*DbgNode) << ']';
+/// print - Print descriptor.
+void DIDescriptor::print(raw_ostream &OS) const {
+ OS << "[" << dwarf::TagString(getTag()) << "] ";
+ OS.write_hex((intptr_t) &*DbgNode) << ']';
}
-/// dump - Print compile unit.
-void DICompileUnit::dump() const {
+/// print - Print compile unit.
+void DICompileUnit::print(raw_ostream &OS) const {
if (getLanguage())
- dbgs() << " [" << dwarf::LanguageString(getLanguage()) << "] ";
+ OS << " [" << dwarf::LanguageString(getLanguage()) << "] ";
- dbgs() << " [" << getDirectory() << "/" << getFilename() << " ]";
+ OS << " [" << getDirectory() << "/" << getFilename() << "]";
}
-/// dump - Print type.
-void DIType::dump() const {
+/// print - Print type.
+void DIType::print(raw_ostream &OS) const {
if (!DbgNode) return;
StringRef Res = getName();
if (!Res.empty())
- dbgs() << " [" << Res << "] ";
+ OS << " [" << Res << "] ";
unsigned Tag = getTag();
- dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+ OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().dump();
- dbgs() << " ["
- << getLineNumber() << ", "
- << getSizeInBits() << ", "
- << getAlignInBits() << ", "
- << getOffsetInBits()
+ getCompileUnit().print(OS);
+ OS << " ["
+ << "line " << getLineNumber() << ", "
+ << getSizeInBits() << " bits, "
+ << getAlignInBits() << " bit alignment, "
+ << getOffsetInBits() << " bit offset"
<< "] ";
if (isPrivate())
- dbgs() << " [private] ";
+ OS << " [private] ";
else if (isProtected())
- dbgs() << " [protected] ";
+ OS << " [protected] ";
if (isForwardDecl())
- dbgs() << " [fwd] ";
+ OS << " [fwd] ";
if (isBasicType())
- DIBasicType(DbgNode).dump();
+ DIBasicType(DbgNode).print(OS);
else if (isDerivedType())
- DIDerivedType(DbgNode).dump();
+ DIDerivedType(DbgNode).print(OS);
else if (isCompositeType())
- DICompositeType(DbgNode).dump();
+ DICompositeType(DbgNode).print(OS);
else {
- dbgs() << "Invalid DIType\n";
+ OS << "Invalid DIType\n";
return;
}
- dbgs() << "\n";
+ OS << "\n";
}
-/// dump - Print basic type.
-void DIBasicType::dump() const {
- dbgs() << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
+/// print - Print basic type.
+void DIBasicType::print(raw_ostream &OS) const {
+ OS << " [" << dwarf::AttributeEncodingString(getEncoding()) << "] ";
}
-/// dump - Print derived type.
-void DIDerivedType::dump() const {
- dbgs() << "\n\t Derived From: "; getTypeDerivedFrom().dump();
+/// print - Print derived type.
+void DIDerivedType::print(raw_ostream &OS) const {
+ OS << "\n\t Derived From: "; getTypeDerivedFrom().print(OS);
}
-/// dump - Print composite type.
-void DICompositeType::dump() const {
+/// print - Print composite type.
+void DICompositeType::print(raw_ostream &OS) const {
DIArray A = getTypeArray();
- dbgs() << " [" << A.getNumElements() << " elements]";
+ OS << " [" << A.getNumElements() << " elements]";
}
-/// dump - Print global.
-void DIGlobal::dump() const {
+/// print - Print subprogram.
+void DISubprogram::print(raw_ostream &OS) const {
StringRef Res = getName();
if (!Res.empty())
- dbgs() << " [" << Res << "] ";
+ OS << " [" << Res << "] ";
unsigned Tag = getTag();
- dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+ OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().dump();
- dbgs() << " [" << getLineNumber() << "] ";
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
if (isLocalToUnit())
- dbgs() << " [local] ";
+ OS << " [local] ";
if (isDefinition())
- dbgs() << " [def] ";
+ OS << " [def] ";
- if (isGlobalVariable())
- DIGlobalVariable(DbgNode).dump();
-
- dbgs() << "\n";
+ OS << "\n";
}
-/// dump - Print subprogram.
-void DISubprogram::dump() const {
+/// print - Print global variable.
+void DIGlobalVariable::print(raw_ostream &OS) const {
+ OS << " [";
StringRef Res = getName();
if (!Res.empty())
- dbgs() << " [" << Res << "] ";
+ OS << " [" << Res << "] ";
unsigned Tag = getTag();
- dbgs() << " [" << dwarf::TagString(Tag) << "] ";
+ OS << " [" << dwarf::TagString(Tag) << "] ";
// TODO : Print context
- getCompileUnit().dump();
- dbgs() << " [" << getLineNumber() << "] ";
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
if (isLocalToUnit())
- dbgs() << " [local] ";
+ OS << " [local] ";
if (isDefinition())
- dbgs() << " [def] ";
+ OS << " [def] ";
+
+ if (isGlobalVariable())
+ DIGlobalVariable(DbgNode).print(OS);
+ OS << "]\n";
+}
+
+/// print - Print variable.
+void DIVariable::print(raw_ostream &OS) const {
+ StringRef Res = getName();
+ if (!Res.empty())
+ OS << " [" << Res << "] ";
+
+ getCompileUnit().print(OS);
+ OS << " [" << getLineNumber() << "] ";
+ getType().print(OS);
+ OS << "\n";
+
+ // FIXME: Dump complex addresses
+}
+
+/// dump - Print descriptor to dbgs() with a newline.
+void DIDescriptor::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print compile unit to dbgs() with a newline.
+void DICompileUnit::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print type to dbgs() with a newline.
+void DIType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
- dbgs() << "\n";
+/// dump - Print basic type to dbgs() with a newline.
+void DIBasicType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print derived type to dbgs() with a newline.
+void DIDerivedType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print composite type to dbgs() with a newline.
+void DICompositeType::dump() const {
+ print(dbgs()); dbgs() << '\n';
+}
+
+/// dump - Print subprogram to dbgs() with a newline.
+void DISubprogram::dump() const {
+ print(dbgs()); dbgs() << '\n';
}
/// dump - Print global variable.
void DIGlobalVariable::dump() const {
- dbgs() << " [";
- getGlobal()->dump();
- dbgs() << "] ";
+ print(dbgs()); dbgs() << '\n';
}
/// dump - Print variable.
void DIVariable::dump() const {
- StringRef Res = getName();
- if (!Res.empty())
- dbgs() << " [" << Res << "] ";
-
- getCompileUnit().dump();
- dbgs() << " [" << getLineNumber() << "] ";
- getType().dump();
- dbgs() << "\n";
-
- // FIXME: Dump complex addresses
+ print(dbgs()); dbgs() << '\n';
}
//===----------------------------------------------------------------------===//
@@ -641,7 +659,7 @@ DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)));
else
for (unsigned i = 0; i != NumTys; ++i)
- Elts.push_back(Tys[i].getNode());
+ Elts.push_back(Tys[i]);
return DIArray(MDNode::get(VMContext,Elts.data(), Elts.size()));
}
@@ -694,7 +712,7 @@ DIFile DIFactory::CreateFile(StringRef Filename,
GetTagConstant(dwarf::DW_TAG_file_type),
MDString::get(VMContext, Filename),
MDString::get(VMContext, Directory),
- CU.getNode()
+ CU
};
return DIFile(MDNode::get(VMContext, &Elts[0], 4));
@@ -722,9 +740,9 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
unsigned Encoding) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_base_type),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
@@ -747,9 +765,9 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
unsigned Encoding) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_base_type),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
SizeInBits,
AlignInBits,
@@ -766,7 +784,7 @@ DIType DIFactory::CreateArtificialType(DIType Ty) {
return Ty;
SmallVector<Value *, 9> Elts;
- MDNode *N = Ty.getNode();
+ MDNode *N = Ty;
assert (N && "Unexpected input DIType!");
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
if (Value *V = N->getOperand(i))
@@ -798,15 +816,15 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
DIType DerivedFrom) {
Value *Elts[] = {
GetTagConstant(Tag),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getNode(),
+ DerivedFrom,
};
return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
}
@@ -826,15 +844,15 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
DIType DerivedFrom) {
Value *Elts[] = {
GetTagConstant(Tag),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
SizeInBits,
AlignInBits,
OffsetInBits,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getNode(),
+ DerivedFrom,
};
return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10));
}
@@ -857,16 +875,16 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
Value *Elts[] = {
GetTagConstant(Tag),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits),
ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits),
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getNode(),
- Elements.getNode(),
+ DerivedFrom,
+ Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang),
ContainingType
};
@@ -890,16 +908,16 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
Value *Elts[] = {
GetTagConstant(Tag),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
SizeInBits,
AlignInBits,
OffsetInBits,
ConstantInt::get(Type::getInt32Ty(VMContext), Flags),
- DerivedFrom.getNode(),
- Elements.getNode(),
+ DerivedFrom,
+ Elements,
ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang)
};
return DICompositeType(MDNode::get(VMContext, &Elts[0], 12));
@@ -925,18 +943,18 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_subprogram),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
MDString::get(VMContext, DisplayName),
MDString::get(VMContext, LinkageName),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- Ty.getNode(),
+ Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
- ContainingType.getNode(),
+ ContainingType,
ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial),
ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized)
};
@@ -947,9 +965,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
/// given declaration.
DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
if (SPDeclaration.isDefinition())
- return DISubprogram(SPDeclaration.getNode());
+ return DISubprogram(SPDeclaration);
- MDNode *DeclNode = SPDeclaration.getNode();
+ MDNode *DeclNode = SPDeclaration;
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_subprogram),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
@@ -982,13 +1000,13 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_variable),
llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
MDString::get(VMContext, DisplayName),
MDString::get(VMContext, LinkageName),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- Ty.getNode(),
+ Ty,
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
Val
@@ -1010,16 +1028,24 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
StringRef Name,
DIFile F,
unsigned LineNo,
- DIType Ty) {
+ DIType Ty, bool AlwaysPreserve) {
Value *Elts[] = {
GetTagConstant(Tag),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
- Ty.getNode(),
+ Ty,
};
- return DIVariable(MDNode::get(VMContext, &Elts[0], 6));
+ MDNode *Node = MDNode::get(VMContext, &Elts[0], 6);
+ if (AlwaysPreserve) {
+ // The optimizer may remove local variable. If there is an interest
+ // to preserve variable info in such situation then stash it in a
+ // named mdnode.
+ NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv");
+ NMD->addOperand(Node);
+ }
+ return DIVariable(Node);
}
@@ -1033,11 +1059,11 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
SmallVector<Value *, 9> &addr) {
SmallVector<Value *, 9> Elts;
Elts.push_back(GetTagConstant(Tag));
- Elts.push_back(Context.getNode());
+ Elts.push_back(Context);
Elts.push_back(MDString::get(VMContext, Name));
- Elts.push_back(F.getNode());
+ Elts.push_back(F);
Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo));
- Elts.push_back(Ty.getNode());
+ Elts.push_back(Ty);
Elts.insert(Elts.end(), addr.begin(), addr.end());
return DIVariable(MDNode::get(VMContext, &Elts[0], 6+addr.size()));
@@ -1050,7 +1076,7 @@ DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
unsigned LineNo, unsigned Col) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_lexical_block),
- Context.getNode(),
+ Context,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt32Ty(VMContext), Col)
};
@@ -1064,9 +1090,9 @@ DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name,
unsigned LineNo) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_namespace),
- Context.getNode(),
+ Context,
MDString::get(VMContext, Name),
- F.getNode(),
+ F,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)
};
return DINameSpace(MDNode::get(VMContext, &Elts[0], 5));
@@ -1078,8 +1104,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
Value *Elts[] = {
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
- S.getNode(),
- OrigLoc.getNode(),
+ S,
+ OrigLoc,
};
return DILocation(MDNode::get(VMContext, &Elts[0], 4));
}
@@ -1090,7 +1116,7 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
Value *Elts[] = {
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo),
- S.getNode(),
+ S,
OrigLoc
};
return DILocation(MDNode::get(VMContext, &Elts[0], 4));
@@ -1104,12 +1130,12 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
Instruction *InsertBefore) {
assert(Storage && "no storage passed to dbg.declare");
- assert(D.getNode() && "empty DIVariable passed to dbg.declare");
+ assert(D.Verify() && "empty DIVariable passed to dbg.declare");
if (!DeclareFn)
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
- D.getNode() };
+ D };
return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
}
@@ -1117,12 +1143,12 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
BasicBlock *InsertAtEnd) {
assert(Storage && "no storage passed to dbg.declare");
- assert(D.getNode() && "empty DIVariable passed to dbg.declare");
+ assert(D.Verify() && "invalid DIVariable passed to dbg.declare");
if (!DeclareFn)
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1),
- D.getNode() };
+ D };
// If this block already has a terminator then insert this intrinsic
// before the terminator.
@@ -1136,13 +1162,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
DIVariable D,
Instruction *InsertBefore) {
assert(V && "no value passed to dbg.value");
- assert(D.getNode() && "empty DIVariable passed to dbg.value");
+ assert(D.Verify() && "invalid DIVariable passed to dbg.value");
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
- D.getNode() };
+ D };
return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
}
@@ -1151,13 +1177,13 @@ Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset,
DIVariable D,
BasicBlock *InsertAtEnd) {
assert(V && "no value passed to dbg.value");
- assert(D.getNode() && "empty DIVariable passed to dbg.value");
+ assert(D.Verify() && "invalid DIVariable passed to dbg.value");
if (!ValueFn)
ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
Value *Args[] = { MDNode::get(V->getContext(), &V, 1),
ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
- D.getNode() };
+ D };
return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
}
@@ -1182,11 +1208,11 @@ void DebugInfoFinder::processModule(Module &M) {
DIDescriptor Scope(Loc.getScope(Ctx));
if (Scope.isCompileUnit())
- addCompileUnit(DICompileUnit(Scope.getNode()));
+ addCompileUnit(DICompileUnit(Scope));
else if (Scope.isSubprogram())
- processSubprogram(DISubprogram(Scope.getNode()));
+ processSubprogram(DISubprogram(Scope));
else if (Scope.isLexicalBlock())
- processLexicalBlock(DILexicalBlock(Scope.getNode()));
+ processLexicalBlock(DILexicalBlock(Scope));
if (MDNode *IA = Loc.getInlinedAt(Ctx))
processLocation(DILocation(IA));
@@ -1208,13 +1234,13 @@ void DebugInfoFinder::processModule(Module &M) {
/// processLocation - Process DILocation.
void DebugInfoFinder::processLocation(DILocation Loc) {
if (!Loc.Verify()) return;
- DIDescriptor S(Loc.getScope().getNode());
+ DIDescriptor S(Loc.getScope());
if (S.isCompileUnit())
- addCompileUnit(DICompileUnit(S.getNode()));
+ addCompileUnit(DICompileUnit(S));
else if (S.isSubprogram())
- processSubprogram(DISubprogram(S.getNode()));
+ processSubprogram(DISubprogram(S));
else if (S.isLexicalBlock())
- processLexicalBlock(DILexicalBlock(S.getNode()));
+ processLexicalBlock(DILexicalBlock(S));
processLocation(Loc.getOrigLocation());
}
@@ -1225,18 +1251,18 @@ void DebugInfoFinder::processType(DIType DT) {
addCompileUnit(DT.getCompileUnit());
if (DT.isCompositeType()) {
- DICompositeType DCT(DT.getNode());
+ DICompositeType DCT(DT);
processType(DCT.getTypeDerivedFrom());
DIArray DA = DCT.getTypeArray();
for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) {
DIDescriptor D = DA.getElement(i);
if (D.isType())
- processType(DIType(D.getNode()));
+ processType(DIType(D));
else if (D.isSubprogram())
- processSubprogram(DISubprogram(D.getNode()));
+ processSubprogram(DISubprogram(D));
}
} else if (DT.isDerivedType()) {
- DIDerivedType DDT(DT.getNode());
+ DIDerivedType DDT(DT);
processType(DDT.getTypeDerivedFrom());
}
}
@@ -1245,9 +1271,9 @@ void DebugInfoFinder::processType(DIType DT) {
void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) {
DIScope Context = LB.getContext();
if (Context.isLexicalBlock())
- return processLexicalBlock(DILexicalBlock(Context.getNode()));
+ return processLexicalBlock(DILexicalBlock(Context));
else
- return processSubprogram(DISubprogram(Context.getNode()));
+ return processSubprogram(DISubprogram(Context));
}
/// processSubprogram - Process DISubprogram.
@@ -1267,7 +1293,7 @@ void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
if (!DV.isVariable())
return;
- if (!NodesSeen.insert(DV.getNode()))
+ if (!NodesSeen.insert(DV))
return;
addCompileUnit(DIVariable(N).getCompileUnit());
@@ -1279,10 +1305,10 @@ bool DebugInfoFinder::addType(DIType DT) {
if (!DT.isValid())
return false;
- if (!NodesSeen.insert(DT.getNode()))
+ if (!NodesSeen.insert(DT))
return false;
- TYs.push_back(DT.getNode());
+ TYs.push_back(DT);
return true;
}
@@ -1291,34 +1317,34 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) {
if (!CU.Verify())
return false;
- if (!NodesSeen.insert(CU.getNode()))
+ if (!NodesSeen.insert(CU))
return false;
- CUs.push_back(CU.getNode());
+ CUs.push_back(CU);
return true;
}
/// addGlobalVariable - Add global variable into GVs.
bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) {
- if (!DIDescriptor(DIG.getNode()).isGlobalVariable())
+ if (!DIDescriptor(DIG).isGlobalVariable())
return false;
- if (!NodesSeen.insert(DIG.getNode()))
+ if (!NodesSeen.insert(DIG))
return false;
- GVs.push_back(DIG.getNode());
+ GVs.push_back(DIG);
return true;
}
// addSubprogram - Add subprgoram into SPs.
bool DebugInfoFinder::addSubprogram(DISubprogram SP) {
- if (!DIDescriptor(SP.getNode()).isSubprogram())
+ if (!DIDescriptor(SP).isSubprogram())
return false;
- if (!NodesSeen.insert(SP.getNode()))
+ if (!NodesSeen.insert(SP))
return false;
- SPs.push_back(SP.getNode());
+ SPs.push_back(SP);
return true;
}
@@ -1333,8 +1359,8 @@ static Value *findDbgGlobalDeclare(GlobalVariable *V) {
DIDescriptor DIG(cast_or_null<MDNode>(NMD->getOperand(i)));
if (!DIG.isGlobalVariable())
continue;
- if (DIGlobalVariable(DIG.getNode()).getGlobal() == V)
- return DIG.getNode();
+ if (DIGlobalVariable(DIG).getGlobal() == V)
+ return DIG;
}
return 0;
}
@@ -1406,13 +1432,13 @@ bool llvm::getLocationInfo(const Value *V, std::string &DisplayName,
}
/// getDISubprogram - Find subprogram that is enclosing this scope.
-DISubprogram llvm::getDISubprogram(MDNode *Scope) {
+DISubprogram llvm::getDISubprogram(const MDNode *Scope) {
DIDescriptor D(Scope);
if (D.isSubprogram())
return DISubprogram(Scope);
if (D.isLexicalBlock())
- return getDISubprogram(DILexicalBlock(Scope).getContext().getNode());
+ return getDISubprogram(DILexicalBlock(Scope).getContext());
return DISubprogram();
}
@@ -1420,10 +1446,10 @@ DISubprogram llvm::getDISubprogram(MDNode *Scope) {
/// getDICompositeType - Find underlying composite type.
DICompositeType llvm::getDICompositeType(DIType T) {
if (T.isCompositeType())
- return DICompositeType(T.getNode());
+ return DICompositeType(T);
if (T.isDerivedType())
- return getDICompositeType(DIDerivedType(T.getNode()).getTypeDerivedFrom());
+ return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom());
return DICompositeType();
}
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 6271371..98dbb69 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -175,7 +175,11 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
// Each argument to a call takes on average one instruction to set up.
NumInsts += CS.arg_size();
- ++NumCalls;
+
+ // We don't want inline asm to count as a call - that would prevent loop
+ // unrolling. The argument setup cost is still real, though.
+ if (!isa<InlineAsm>(CS.getCalledValue()))
+ ++NumCalls;
}
}
@@ -455,6 +459,11 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) {
else
CallerMetrics.NumInsts = 0;
- // We are not updating the argumentweights. We have already determined that
+ // We are not updating the argument weights. We have already determined that
// Caller is a fairly large function, so we accept the loss of precision.
}
+
+/// clear - empty the cache of inline costs
+void InlineCostAnalyzer::clear() {
+ CachedFunctionInfo.clear();
+}
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index 25d4f95..a031cbc 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -179,6 +179,7 @@ bool Lint::runOnFunction(Function &F) {
TD = getAnalysisIfAvailable<TargetData>();
visit(F);
dbgs() << MessagesStr.str();
+ Messages.clear();
return false;
}
@@ -193,7 +194,6 @@ void Lint::visitCallSite(CallSite CS) {
Instruction &I = *CS.getInstruction();
Value *Callee = CS.getCalledValue();
- // TODO: Check function alignment?
visitMemoryReference(I, Callee, 0, 0, MemRef::Callee);
if (Function *F = dyn_cast<Function>(Callee->stripPointerCasts())) {
@@ -219,7 +219,15 @@ void Lint::visitCallSite(CallSite CS) {
// TODO: Check sret attribute.
}
- // TODO: Check the "tail" keyword constraints.
+ if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())
+ for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();
+ AI != AE; ++AI) {
+ Value *Obj = (*AI)->getUnderlyingObject();
+ Assert1(!isa<AllocaInst>(Obj) && !isa<VAArgInst>(Obj),
+ "Undefined behavior: Call with \"tail\" keyword references "
+ "alloca or va_arg", &I);
+ }
+
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
switch (II->getIntrinsicID()) {
@@ -280,8 +288,11 @@ void Lint::visitCallSite(CallSite CS) {
break;
case Intrinsic::stackrestore:
+ // Stackrestore doesn't read or write memory, but it sets the
+ // stack pointer, which the compiler may read from or write to
+ // at any time, so check it for both readability and writeability.
visitMemoryReference(I, CS.getArgument(0), 0, 0,
- MemRef::Read);
+ MemRef::Read | MemRef::Write);
break;
}
}
@@ -482,14 +493,10 @@ void llvm::lintFunction(const Function &f) {
}
/// lintModule - Check a module for errors, printing messages on stderr.
-/// Return true if the module is corrupt.
///
-void llvm::lintModule(const Module &M, std::string *ErrorInfo) {
+void llvm::lintModule(const Module &M) {
PassManager PM;
Lint *V = new Lint();
PM.add(V);
PM.run(const_cast<Module&>(M));
-
- if (ErrorInfo)
- *ErrorInfo = V->MessagesStr.str();
}
diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp
new file mode 100644
index 0000000..556d4c8
--- /dev/null
+++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -0,0 +1,86 @@
+//===-- ModuleDebugInfoPrinter.cpp - Prints module debug info metadata ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass decodes the debug info metadata in a module and prints in a
+// (sufficiently-prepared-) human-readable form.
+//
+// For example, run this pass from opt along with the -analyze option, and
+// it'll print to standard output.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/Pass.h"
+#include "llvm/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/Statistic.h"
+using namespace llvm;
+
+namespace {
+ class ModuleDebugInfoPrinter : public ModulePass {
+ DebugInfoFinder Finder;
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ModuleDebugInfoPrinter() : ModulePass(&ID) {}
+
+ virtual bool runOnModule(Module &M);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ }
+ virtual void print(raw_ostream &O, const Module *M) const;
+ };
+}
+
+char ModuleDebugInfoPrinter::ID = 0;
+static RegisterPass<ModuleDebugInfoPrinter>
+X("module-debuginfo",
+ "Decodes module-level debug info", false, true);
+
+ModulePass *llvm::createModuleDebugInfoPrinterPass() {
+ return new ModuleDebugInfoPrinter();
+}
+
+bool ModuleDebugInfoPrinter::runOnModule(Module &M) {
+ Finder.processModule(M);
+ return false;
+}
+
+void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
+ for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(),
+ E = Finder.compile_unit_end(); I != E; ++I) {
+ O << "Compile Unit: ";
+ DICompileUnit(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.subprogram_begin(),
+ E = Finder.subprogram_end(); I != E; ++I) {
+ O << "Subprogram: ";
+ DISubprogram(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.global_variable_begin(),
+ E = Finder.global_variable_end(); I != E; ++I) {
+ O << "GlobalVariable: ";
+ DIGlobalVariable(*I).print(O);
+ O << '\n';
+ }
+
+ for (DebugInfoFinder::iterator I = Finder.type_begin(),
+ E = Finder.type_end(); I != E; ++I) {
+ O << "Type: ";
+ DIType(*I).print(O);
+ O << '\n';
+ }
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 46f3cbc..9b4370f 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -537,6 +537,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(coldcc);
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
+ KEYWORD(x86_thiscallcc);
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 3b08ca1..226d8d3 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1074,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= 'coldcc'
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
+/// ::= 'x86_thiscallcc'
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
@@ -1088,6 +1089,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_coldcc: CC = CallingConv::Cold; break;
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 3ac9169..5eed170 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -68,7 +68,7 @@ namespace lltok {
kw_c,
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
- kw_x86_stdcallcc, kw_x86_fastcallcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc, kw_x86_thiscallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_msp430_intrcc,
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index ded4b3f..5a0c27b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -205,16 +205,10 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const {
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
// .weak_definition _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition);
- } else if (const char *LinkOnce = MAI->getLinkOnceDirective()) {
+ } else if (MAI->getLinkOnceDirective() != 0) {
// .globl _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global);
- // FIXME: linkonce should be a section attribute, handled by COFF Section
- // assignment.
- // http://sourceware.org/binutils/docs-2.20/as/Linkonce.html#Linkonce
- // .linkonce discard
- // FIXME: It would be nice to use .linkonce samesize for non-common
- // globals.
- OutStreamer.EmitRawText(StringRef(LinkOnce));
+ //NOTE: linkonce is handled by the section the symbol was assigned to.
} else {
// .weak _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak);
@@ -247,6 +241,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (EmitSpecialLLVMGlobal(GV))
return;
+ if (isVerbose()) {
+ WriteAsOperand(OutStreamer.GetCommentOS(), GV,
+ /*PrintType=*/false, GV->getParent());
+ OutStreamer.GetCommentOS() << '\n';
+ }
+
MCSymbol *GVSym = Mang->getSymbol(GV);
EmitVisibility(GVSym, GV->getVisibility());
@@ -316,17 +316,57 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog);
return;
}
+
+ // Handle thread local data for mach-o which requires us to output an
+ // additional structure of data and mangle the original symbol so that we
+ // can reference it later.
+ if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
+ // Emit the .tbss symbol
+ MCSymbol *MangSym =
+ OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+
+ if (GVKind.isThreadBSS())
+ OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ else if (GVKind.isThreadData()) {
+ OutStreamer.SwitchSection(TheSection);
+
+ EmitLinkage(GV->getLinkage(), MangSym);
+ EmitAlignment(AlignLog, GV);
+ OutStreamer.EmitLabel(MangSym);
+
+ EmitGlobalConstant(GV->getInitializer());
+ }
+
+ OutStreamer.AddBlankLine();
+
+ // Emit the variable struct for the runtime.
+ const MCSection *TLVSect
+ = getObjFileLowering().getTLSExtraDataSection();
+
+ OutStreamer.SwitchSection(TLVSect);
+ // Emit the linkage here.
+ EmitLinkage(GV->getLinkage(), GVSym);
+ OutStreamer.EmitLabel(GVSym);
+
+ // Three pointers in size:
+ // - __tlv_bootstrap - used to make sure support exists
+ // - spare pointer, used when mapped by the runtime
+ // - pointer to mangled symbol above with initializer
+ unsigned PtrSize = TD->getPointerSizeInBits()/8;
+ OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"),
+ PtrSize, 0);
+ OutStreamer.EmitIntValue(0, PtrSize, 0);
+ OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0);
+
+ OutStreamer.AddBlankLine();
+ return;
+ }
OutStreamer.SwitchSection(TheSection);
EmitLinkage(GV->getLinkage(), GVSym);
EmitAlignment(AlignLog, GV);
- if (isVerbose()) {
- WriteAsOperand(OutStreamer.GetCommentOS(), GV,
- /*PrintType=*/false, GV->getParent());
- OutStreamer.GetCommentOS() << '\n';
- }
OutStreamer.EmitLabel(GVSym);
EmitGlobalConstant(GV->getInitializer());
@@ -408,7 +448,13 @@ void AsmPrinter::EmitFunctionHeader() {
/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
/// function. This can be overridden by targets as required to do custom stuff.
void AsmPrinter::EmitFunctionEntryLabel() {
- OutStreamer.EmitLabel(CurrentFnSym);
+ // The function label could have already been emitted if two symbols end up
+ // conflicting due to asm renaming. Detect this and emit an error.
+ if (CurrentFnSym->isUndefined())
+ return OutStreamer.EmitLabel(CurrentFnSym);
+
+ report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
+ "' label emitted multiple times to assembly file");
}
@@ -503,7 +549,7 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode*>(MI->getOperand(2).getMetadata()));
if (V.getContext().isSubprogram())
- OS << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+ OS << DISubprogram(V.getContext()).getDisplayName() << ":";
OS << V.getName() << " <- ";
// Register or immediate value. Register 0 means undef.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 37d10e5..ba6fed2 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -53,6 +53,17 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
}
SourceMgr SrcMgr;
+
+ // Ensure the buffer is newline terminated.
+ char *TmpString = 0;
+ if (Str.back() != '\n') {
+ TmpString = new char[Str.size() + 2];
+ memcpy(TmpString, Str.data(), Str.size());
+ TmpString[Str.size()] = '\n';
+ TmpString[Str.size() + 1] = 0;
+ isNullTerminated = true;
+ Str = TmpString;
+ }
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
@@ -84,6 +95,9 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
/*NoFinalize*/ true);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
+
+ if (TmpString)
+ delete[] TmpString;
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 9cb8314..d56c094 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -315,6 +315,10 @@ namespace llvm {
///
virtual void EmitValue(AsmPrinter *AP, unsigned Form) const;
+ /// getValue - Get MCSymbol.
+ ///
+ const MCSymbol *getValue() const { return Label; }
+
/// SizeOf - Determine size of label value in bytes.
///
virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index e9e9ba5..890507c 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -47,6 +47,10 @@ static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
static cl::opt<bool> DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
+static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
+ cl::desc("Make an absense of debug location information explicit."),
+ cl::init(false));
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -78,12 +82,12 @@ class CompileUnit {
/// GVToDieMap - Tracks the mapping of unit level debug informaton
/// variables to debug information entries.
/// FIXME : Rename GVToDieMap -> NodeToDieMap
- DenseMap<MDNode *, DIE *> GVToDieMap;
+ DenseMap<const MDNode *, DIE *> GVToDieMap;
/// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton
/// descriptors to debug information entries using a DIEEntry proxy.
/// FIXME : Rename
- DenseMap<MDNode *, DIEEntry *> GVToDIEEntryMap;
+ DenseMap<const MDNode *, DIEEntry *> GVToDIEEntryMap;
/// Globals - A map of globally visible named entities for this unit.
///
@@ -119,24 +123,24 @@ public:
/// getDIE - Returns the debug information entry map slot for the
/// specified debug variable.
- DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); }
+ DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); }
/// insertDIE - Insert DIE into the map.
- void insertDIE(MDNode *N, DIE *D) {
+ void insertDIE(const MDNode *N, DIE *D) {
GVToDieMap.insert(std::make_pair(N, D));
}
/// getDIEEntry - Returns the debug information entry for the speciefied
/// debug variable.
- DIEEntry *getDIEEntry(MDNode *N) {
- DenseMap<MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
+ DIEEntry *getDIEEntry(const MDNode *N) {
+ DenseMap<const MDNode *, DIEEntry *>::iterator I = GVToDIEEntryMap.find(N);
if (I == GVToDIEEntryMap.end())
return NULL;
return I->second;
}
/// insertDIEEntry - Insert debug information entry into the map.
- void insertDIEEntry(MDNode *N, DIEEntry *E) {
+ void insertDIEEntry(const MDNode *N, DIEEntry *E) {
GVToDIEEntryMap.insert(std::make_pair(N, E));
}
@@ -164,31 +168,18 @@ public:
///
class DbgVariable {
DIVariable Var; // Variable Descriptor.
- unsigned FrameIndex; // Variable frame index.
- const MachineInstr *DbgValueMInsn; // DBG_VALUE
- // DbgValueLabel - DBG_VALUE is effective from this label.
- MCSymbol *DbgValueLabel;
- DbgVariable *const AbstractVar; // Abstract variable for this variable.
- DIE *TheDIE;
+ DIE *TheDIE; // Variable DIE.
+ unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries.
public:
// AbsVar may be NULL.
- DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar)
- : Var(V), FrameIndex(I), DbgValueMInsn(0),
- DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {}
- DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar)
- : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0),
- AbstractVar(AbsVar), TheDIE(0)
- {}
+ DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {}
// Accessors.
DIVariable getVariable() const { return Var; }
- unsigned getFrameIndex() const { return FrameIndex; }
- const MachineInstr *getDbgValue() const { return DbgValueMInsn; }
- MCSymbol *getDbgValueLabel() const { return DbgValueLabel; }
- void setDbgValueLabel(MCSymbol *L) { DbgValueLabel = L; }
- DbgVariable *getAbstractVariable() const { return AbstractVar; }
void setDIE(DIE *D) { TheDIE = D; }
DIE *getDIE() const { return TheDIE; }
+ void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; }
+ unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; }
};
//===----------------------------------------------------------------------===//
@@ -204,7 +195,7 @@ class DbgScope {
DbgScope *Parent; // Parent to this scope.
DIDescriptor Desc; // Debug info descriptor for scope.
// Location at which this scope is inlined.
- AssertingVH<MDNode> InlinedAtLocation;
+ AssertingVH<const MDNode> InlinedAtLocation;
bool AbstractScope; // Abstract Scope
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
@@ -217,7 +208,7 @@ class DbgScope {
// Private state for dump()
mutable unsigned IndentLevel;
public:
- DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
+ DbgScope(DbgScope *P, DIDescriptor D, const MDNode *I = 0)
: Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
LastInsn(0), FirstInsn(0),
DFSIn(0), DFSOut(0), IndentLevel(0) {}
@@ -227,8 +218,8 @@ public:
DbgScope *getParent() const { return Parent; }
void setParent(DbgScope *P) { Parent = P; }
DIDescriptor getDesc() const { return Desc; }
- MDNode *getInlinedAt() const { return InlinedAtLocation; }
- MDNode *getScopeNode() const { return Desc.getNode(); }
+ const MDNode *getInlinedAt() const { return InlinedAtLocation; }
+ const MDNode *getScopeNode() const { return Desc; }
const SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
const SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
const SmallVector<DbgRange, 4> &getRanges() { return Ranges; }
@@ -300,7 +291,7 @@ public:
void DbgScope::dump() const {
raw_ostream &err = dbgs();
err.indent(IndentLevel);
- MDNode *N = Desc.getNode();
+ const MDNode *N = Desc;
N->dump();
if (AbstractScope)
err << "Abstract Scope\n";
@@ -322,15 +313,15 @@ DbgScope::~DbgScope() {
}
DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
- : Asm(A), MMI(Asm->MMI), ModuleCU(0),
+ : Asm(A), MMI(Asm->MMI), FirstCU(0),
AbbreviationsSet(InitAbbreviationsSetSize),
CurrentFnDbgScope(0), PrevLabel(NULL) {
NextStringPoolNumber = 0;
DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0;
DwarfStrSectionSym = TextSectionSym = 0;
- DwarfDebugRangeSectionSym = 0;
- FunctionBeginSym = 0;
+ DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0;
+ FunctionBeginSym = FunctionEndSym = 0;
if (TimePassesIsEnabled) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName);
beginModule(M);
@@ -444,8 +435,8 @@ void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
/// addSourceLine - Add location information to specified debug information
/// entry.
void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
- // If there is no compile unit specified, don't add a line #.
- if (!V->getCompileUnit().Verify())
+ // Verify variable.
+ if (!V->Verify())
return;
unsigned Line = V->getLineNumber();
@@ -458,9 +449,9 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
/// addSourceLine - Add location information to specified debug information
/// entry.
-void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
- // If there is no compile unit specified, don't add a line #.
- if (!G->getCompileUnit().Verify())
+void DwarfDebug::addSourceLine(DIE *Die, const DIGlobalVariable *G) {
+ // Verify global variable.
+ if (!G->Verify())
return;
unsigned Line = G->getLineNumber();
@@ -474,8 +465,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
/// addSourceLine - Add location information to specified debug information
/// entry.
void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
- // If there is no compile unit specified, don't add a line #.
- if (!SP->getCompileUnit().Verify())
+ // Verify subprogram.
+ if (!SP->Verify())
return;
// If the line number is 0, don't add it.
if (SP->getLineNumber() == 0)
@@ -494,9 +485,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
/// addSourceLine - Add location information to specified debug information
/// entry.
void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
- // If there is no compile unit specified, don't add a line #.
- DICompileUnit CU = Ty->getCompileUnit();
- if (!CU.Verify())
+ // Verify type.
+ if (!Ty->Verify())
return;
unsigned Line = Ty->getLineNumber();
@@ -512,8 +502,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
/// addSourceLine - Add location information to specified debug information
/// entry.
void DwarfDebug::addSourceLine(DIE *Die, const DINameSpace *NS) {
- // If there is no compile unit specified, don't add a line #.
- if (!NS->getCompileUnit().Verify())
+ // Verify namespace.
+ if (!NS->Verify())
return;
unsigned Line = NS->getLineNumber();
@@ -560,16 +550,16 @@ DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) {
unsigned tag = Ty.getTag();
if (tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty.getNode());
+ DIDerivedType DTy = DIDerivedType(Ty);
subType = DTy.getTypeDerivedFrom();
}
- DICompositeType blockStruct = DICompositeType(subType.getNode());
+ DICompositeType blockStruct = DICompositeType(subType);
DIArray Elements = blockStruct.getTypeArray();
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
- DIDerivedType DT = DIDerivedType(Element.getNode());
+ DIDerivedType DT = DIDerivedType(Element);
if (Name == DT.getName())
return (DT.getTypeDerivedFrom());
}
@@ -700,12 +690,12 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
StringRef varName = VD.getName();
if (Tag == dwarf::DW_TAG_pointer_type) {
- DIDerivedType DTy = DIDerivedType(Ty.getNode());
+ DIDerivedType DTy = DIDerivedType(Ty);
TmpTy = DTy.getTypeDerivedFrom();
isPointer = true;
}
- DICompositeType blockStruct = DICompositeType(TmpTy.getNode());
+ DICompositeType blockStruct = DICompositeType(TmpTy);
// Find the __forwarding field and the variable field in the __Block_byref
// struct.
@@ -715,7 +705,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) {
DIDescriptor Element = Fields.getElement(i);
- DIDerivedType DT = DIDerivedType(Element.getNode());
+ DIDerivedType DT = DIDerivedType(Element);
StringRef fieldName = DT.getName();
if (fieldName == "__forwarding")
forwardingField = Element;
@@ -725,9 +715,9 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
// Get the offsets for the forwarding field and the variable field.
unsigned forwardingFieldOffset =
- DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
+ DIDerivedType(forwardingField).getOffsetInBits() >> 3;
unsigned varFieldOffset =
- DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
+ DIDerivedType(varField).getOffsetInBits() >> 3;
// Decode the original location, and use that as the start of the byref
// variable's location.
@@ -813,7 +803,7 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
}
/// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
const MachineOperand &MO) {
assert (MO.isReg() && "Invalid machine operand!");
if (!MO.getReg())
@@ -821,26 +811,26 @@ bool DwarfDebug::addRegisterAddress(DIE *Die, DbgVariable *DV,
MachineLocation Location;
Location.set(MO.getReg());
addAddress(Die, dwarf::DW_AT_location, Location);
- if (MCSymbol *VS = DV->getDbgValueLabel())
+ if (VS)
addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, DbgVariable *DV,
+bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
const MachineOperand &MO) {
assert (MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
unsigned Imm = MO.getImm();
addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
- if (MCSymbol *VS = DV->getDbgValueLabel())
+ if (VS)
addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV,
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
const MachineOperand &MO) {
assert (MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
@@ -862,10 +852,8 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV,
(unsigned char)0xFF & FltPtr[Start]);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
-
- if (MCSymbol *VS = DV->getDbgValueLabel())
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
- VS);
+ if (VS)
+ addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
@@ -873,34 +861,35 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, DbgVariable *DV,
/// addToContextOwner - Add Die into the list of its context owner's children.
void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
if (Context.isType()) {
- DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
+ DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context));
ContextDIE->addChild(Die);
} else if (Context.isNameSpace()) {
- DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context.getNode()));
+ DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
- } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode()))
+ } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
else
- ModuleCU->addDie(Die);
+ getCompileUnit(Context)->addDie(Die);
}
/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
/// given DIType.
DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
- DIE *TyDIE = ModuleCU->getDIE(Ty.getNode());
+ CompileUnit *TypeCU = getCompileUnit(Ty);
+ DIE *TyDIE = TypeCU->getDIE(Ty);
if (TyDIE)
return TyDIE;
// Create new type.
TyDIE = new DIE(dwarf::DW_TAG_base_type);
- ModuleCU->insertDIE(Ty.getNode(), TyDIE);
+ TypeCU->insertDIE(Ty, TyDIE);
if (Ty.isBasicType())
- constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode()));
+ constructTypeDIE(*TyDIE, DIBasicType(Ty));
else if (Ty.isCompositeType())
- constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode()));
+ constructTypeDIE(*TyDIE, DICompositeType(Ty));
else {
assert(Ty.isDerivedType() && "Unknown kind of DIType");
- constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode()));
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty));
}
addToContextOwner(TyDIE, Ty.getContext());
@@ -909,11 +898,12 @@ DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
/// addType - Add a new type attribute to the specified entity.
void DwarfDebug::addType(DIE *Entity, DIType Ty) {
- if (!Ty.isValid())
+ if (!Ty.Verify())
return;
// Check for pre-existence.
- DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode());
+ CompileUnit *TypeCU = getCompileUnit(Ty);
+ DIEEntry *Entry = TypeCU->getDIEEntry(Ty);
// If it exists then use the existing value.
if (Entry) {
Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
@@ -925,7 +915,7 @@ void DwarfDebug::addType(DIE *Entity, DIType Ty) {
// Set up proxy.
Entry = createDIEEntry(Buffer);
- ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
+ TypeCU->insertDIEEntry(Ty, Entry);
Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
}
@@ -994,9 +984,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add enumerators to enumeration type.
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIE *ElemDie = NULL;
- DIDescriptor Enum(Elements.getElement(i).getNode());
+ DIDescriptor Enum(Elements.getElement(i));
if (Enum.isEnumerator()) {
- ElemDie = constructEnumTypeDIE(DIEnumerator(Enum.getNode()));
+ ElemDie = constructEnumTypeDIE(DIEnumerator(Enum));
Buffer.addChild(ElemDie);
}
}
@@ -1006,7 +996,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Add return type.
DIArray Elements = CTy.getTypeArray();
DIDescriptor RTy = Elements.getElement(0);
- addType(&Buffer, DIType(RTy.getNode()));
+ addType(&Buffer, DIType(RTy));
// Add prototype flag.
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -1015,7 +1005,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
DIDescriptor Ty = Elements.getElement(i);
- addType(Arg, DIType(Ty.getNode()));
+ addType(Arg, DIType(Ty));
Buffer.addChild(Arg);
}
}
@@ -1036,9 +1026,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIDescriptor Element = Elements.getElement(i);
DIE *ElemDie = NULL;
if (Element.isSubprogram())
- ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
+ ElemDie = createSubprogramDIE(DISubprogram(Element));
else if (Element.isVariable()) {
- DIVariable DV(Element.getNode());
+ DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
DV.getName());
@@ -1047,7 +1037,7 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(ElemDie, &DV);
} else if (Element.isDerivedType())
- ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
+ ElemDie = createMemberDIE(DIDerivedType(Element));
else
continue;
Buffer.addChild(ElemDie);
@@ -1062,9 +1052,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
dwarf::DW_FORM_data1, RLang);
DICompositeType ContainingType = CTy.getContainingType();
- if (DIDescriptor(ContainingType.getNode()).isCompositeType())
+ if (DIDescriptor(ContainingType).isCompositeType())
addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4,
- getOrCreateTypeDIE(DIType(ContainingType.getNode())));
+ getOrCreateTypeDIE(DIType(ContainingType)));
break;
}
default:
@@ -1120,22 +1110,23 @@ void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
DIArray Elements = CTy->getTypeArray();
// Get an anonymous type for index type.
- DIE *IdxTy = ModuleCU->getIndexTyDie();
+ CompileUnit *TheCU = getCompileUnit(*CTy);
+ DIE *IdxTy = TheCU->getIndexTyDie();
if (!IdxTy) {
// Construct an anonymous type for index type.
IdxTy = new DIE(dwarf::DW_TAG_base_type);
addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_signed);
- ModuleCU->addDie(IdxTy);
- ModuleCU->setIndexTyDie(IdxTy);
+ TheCU->addDie(IdxTy);
+ TheCU->setIndexTyDie(IdxTy);
}
// Add subranges to array type.
for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
if (Element.getTag() == dwarf::DW_TAG_subrange_type)
- constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy);
+ constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy);
}
}
@@ -1262,7 +1253,8 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
/// createSubprogramDIE - Create new DIE using SP.
DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
- DIE *SPDie = ModuleCU->getDIE(SP.getNode());
+ CompileUnit *SPCU = getCompileUnit(SP);
+ DIE *SPDie = SPCU->getDIE(SP);
if (SPDie)
return SPDie;
@@ -1292,7 +1284,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type)
addType(SPDie, SPTy);
else
- addType(SPDie, DIType(Args.getElement(0).getNode()));
+ addType(SPDie, DIType(Args.getElement(0)));
unsigned VK = SP.getVirtuality();
if (VK) {
@@ -1302,7 +1294,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
ContainingTypeMap.insert(std::make_pair(SPDie,
- SP.getContainingType().getNode()));
+ SP.getContainingType()));
}
if (MakeDecl || !SP.isDefinition()) {
@@ -1317,7 +1309,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
if (SPTag == dwarf::DW_TAG_subroutine_type)
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+ DIType ATy = DIType(DIType(Args.getElement(i)));
addType(Arg, ATy);
if (ATy.isArtificial())
addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
@@ -1335,12 +1327,12 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1);
// DW_TAG_inlined_subroutine may refer to this DIE.
- ModuleCU->insertDIE(SP.getNode(), SPDie);
+ SPCU->insertDIE(SP, SPDie);
return SPDie;
}
-DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+DbgScope *DwarfDebug::getOrCreateAbstractScope(const MDNode *N) {
assert(N && "Invalid Scope encoding!");
DbgScope *AScope = AbstractScopes.lookup(N);
@@ -1353,7 +1345,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
if (Scope.isLexicalBlock()) {
DILexicalBlock DB(N);
DIDescriptor ParentDesc = DB.getContext();
- Parent = getOrCreateAbstractScope(ParentDesc.getNode());
+ Parent = getOrCreateAbstractScope(ParentDesc);
}
AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
@@ -1369,14 +1361,14 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
/// isSubprogramContext - Return true if Context is either a subprogram
/// or another context nested inside a subprogram.
-static bool isSubprogramContext(MDNode *Context) {
+static bool isSubprogramContext(const MDNode *Context) {
if (!Context)
return false;
DIDescriptor D(Context);
if (D.isSubprogram())
return true;
if (D.isType())
- return isSubprogramContext(DIType(Context).getContext().getNode());
+ return isSubprogramContext(DIType(Context).getContext());
return false;
}
@@ -1384,8 +1376,9 @@ static bool isSubprogramContext(MDNode *Context) {
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
-DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
- DIE *SPDie = ModuleCU->getDIE(SPNode);
+DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) {
+ CompileUnit *SPCU = getCompileUnit(SPNode);
+ DIE *SPDie = SPCU->getDIE(SPNode);
assert(SPDie && "Unable to find subprogram DIE!");
DISubprogram SP(SPNode);
@@ -1396,7 +1389,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
// specification DIE for a function defined inside a function.
if (SP.isDefinition() && !SP.getContext().isCompileUnit() &&
!SP.getContext().isFile() &&
- !isSubprogramContext(SP.getContext().getNode())) {
+ !isSubprogramContext(SP.getContext())) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments.
@@ -1406,7 +1399,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
if (SPTag == dwarf::DW_TAG_subroutine_type)
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- DIType ATy = DIType(DIType(Args.getElement(i).getNode()));
+ DIType ATy = DIType(DIType(Args.getElement(i)));
addType(Arg, ATy);
if (ATy.isArtificial())
addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
@@ -1416,7 +1409,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
SPDie = new DIE(dwarf::DW_TAG_subprogram);
addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
SPDeclDie);
- ModuleCU->addDie(SPDie);
+ SPCU->addDie(SPDie);
}
addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
@@ -1451,18 +1444,18 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize());
for (SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin(),
RE = Ranges.end(); RI != RE; ++RI) {
- DebugRangeSymbols.push_back(LabelsBeforeInsn.lookup(RI->first));
- DebugRangeSymbols.push_back(LabelsAfterInsn.lookup(RI->second));
+ DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first));
+ DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second));
}
DebugRangeSymbols.push_back(NULL);
DebugRangeSymbols.push_back(NULL);
return ScopeDIE;
}
- MCSymbol *Start = LabelsBeforeInsn.lookup(RI->first);
- MCSymbol *End = LabelsAfterInsn.lookup(RI->second);
+ const MCSymbol *Start = getLabelBeforeInsn(RI->first);
+ const MCSymbol *End = getLabelAfterInsn(RI->second);
- if (Start == 0 || End == 0) return 0;
+ if (End == 0) return 0;
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
@@ -1487,10 +1480,10 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
// For now, use first instruction range and emit low_pc/high_pc pair and
// corresponding .debug_inlined section entry for this pair.
SmallVector<DbgRange, 4>::const_iterator RI = Ranges.begin();
- MCSymbol *StartLabel = LabelsBeforeInsn.lookup(RI->first);
- MCSymbol *EndLabel = LabelsAfterInsn.lookup(RI->second);
+ const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first);
+ const MCSymbol *EndLabel = getLabelAfterInsn(RI->second);
- if (StartLabel == 0 || EndLabel == 0) {
+ if (StartLabel == FunctionBeginSym || EndLabel == 0) {
assert (0 && "Unexpected Start and End labels for a inlined scope!");
return 0;
}
@@ -1504,8 +1497,9 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
- DISubprogram InlinedSP = getDISubprogram(DS.getNode());
- DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
+ DISubprogram InlinedSP = getDISubprogram(DS);
+ CompileUnit *TheCU = getCompileUnit(InlinedSP);
+ DIE *OriginDIE = TheCU->getDIE(InlinedSP);
assert(OriginDIE && "Unable to find Origin DIE!");
addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, OriginDIE);
@@ -1516,18 +1510,18 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
InlinedSubprogramDIEs.insert(OriginDIE);
// Track the start label for this inlined function.
- DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
- I = InlineInfo.find(InlinedSP.getNode());
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP);
if (I == InlineInfo.end()) {
- InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartLabel,
+ InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel,
ScopeDIE));
- InlinedSPNodes.push_back(InlinedSP.getNode());
+ InlinedSPNodes.push_back(InlinedSP);
} else
I->second.push_back(std::make_pair(StartLabel, ScopeDIE));
DILocation DL(Scope->getInlinedAt());
- addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+ addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID());
addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
return ScopeDIE;
@@ -1560,22 +1554,15 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
// Define variable debug information entry.
DIE *VariableDie = new DIE(Tag);
-
DIE *AbsDIE = NULL;
- if (DbgVariable *AV = DV->getAbstractVariable())
- AbsDIE = AV->getDIE();
-
- if (AbsDIE) {
- DIScope DS(Scope->getScopeNode());
- DISubprogram InlinedSP = getDISubprogram(DS.getNode());
- DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
- (void) OriginSPDIE;
- assert(OriginSPDIE && "Unable to find Origin DIE for the SP!");
- DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
- assert(AbsDIE && "Unable to find Origin DIE for the Variable!");
+ DenseMap<const DbgVariable *, const DbgVariable *>::iterator
+ V2AVI = VarToAbstractVarMap.find(DV);
+ if (V2AVI != VarToAbstractVarMap.end())
+ AbsDIE = V2AVI->second->getDIE();
+
+ if (AbsDIE)
addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, AbsDIE);
- }
else {
addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
addSourceLine(VariableDie, &VD);
@@ -1589,55 +1576,76 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
addType(VariableDie, VD.getType());
}
+ if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+
+ if (Scope->isAbstractScope()) {
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
+
// Add variable address.
- if (!Scope->isAbstractScope()) {
- // Check if variable is described by DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV->getDbgValue()) {
- bool updated = false;
- // FIXME : Handle getNumOperands != 3
- if (DVInsn->getNumOperands() == 3) {
- if (DVInsn->getOperand(0).isReg())
- updated = addRegisterAddress(VariableDie, DV, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isImm())
- updated = addConstantValue(VariableDie, DV, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isFPImm())
- updated = addConstantFPValue(VariableDie, DV, DVInsn->getOperand(0));
- } else {
- MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
- if (Location.getReg()) {
- addAddress(VariableDie, dwarf::DW_AT_location, Location);
- if (MCSymbol *VS = DV->getDbgValueLabel())
- addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
- VS);
- updated = true;
- }
- }
- if (!updated) {
- // If variableDie is not updated then DBG_VALUE instruction does not
- // have valid variable info.
- delete VariableDie;
- return NULL;
- }
- }
- else {
- MachineLocation Location;
- unsigned FrameReg;
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- int Offset = RI->getFrameIndexReference(*Asm->MF, DV->getFrameIndex(),
- FrameReg);
- Location.set(FrameReg, Offset);
-
- if (VD.hasComplexAddress())
- addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
+
+ unsigned Offset = DV->getDotDebugLocOffset();
+ if (Offset != ~0U) {
+ addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4,
+ Asm->GetTempSymbol("debug_loc", Offset));
+ DV->setDIE(VariableDie);
+ UseDotDebugLocEntry.insert(VariableDie);
+ return VariableDie;
+ }
+
+ // Check if variable is described by a DBG_VALUE instruction.
+ DenseMap<const DbgVariable *, const MachineInstr *>::iterator DVI =
+ DbgVariableToDbgInstMap.find(DV);
+ if (DVI != DbgVariableToDbgInstMap.end()) {
+ const MachineInstr *DVInsn = DVI->second;
+ const MCSymbol *DVLabel = findVariableLabel(DV);
+ bool updated = false;
+ // FIXME : Handle getNumOperands != 3
+ if (DVInsn->getNumOperands() == 3) {
+ if (DVInsn->getOperand(0).isReg())
+ updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isImm())
+ updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ else if (DVInsn->getOperand(0).isFPImm())
+ updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ } else {
+ MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
+ if (Location.getReg()) {
addAddress(VariableDie, dwarf::DW_AT_location, Location);
+ if (DVLabel)
+ addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+ DVLabel);
+ updated = true;
+ }
}
- }
+ if (!updated) {
+ // If variableDie is not updated then DBG_VALUE instruction does not
+ // have valid variable info.
+ delete VariableDie;
+ return NULL;
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+ }
- if (Tag == dwarf::DW_TAG_formal_parameter && VD.getType().isArtificial())
- addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ // .. else use frame index, if available.
+ MachineLocation Location;
+ unsigned FrameReg;
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ int FI = 0;
+ if (findVariableFrameIndex(DV, &FI)) {
+ int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ Location.set(FrameReg, Offset);
+
+ if (VD.hasComplexAddress())
+ addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ addAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
DV->setDIE(VariableDie);
return VariableDie;
@@ -1651,14 +1659,15 @@ void DwarfDebug::addPubTypes(DISubprogram SP) {
DIArray Args = SPTy.getTypeArray();
for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) {
- DIType ATy(Args.getElement(i).getNode());
- if (!ATy.isValid())
+ DIType ATy(Args.getElement(i));
+ if (!ATy.Verify())
continue;
DICompositeType CATy = getDICompositeType(ATy);
- if (DIDescriptor(CATy.getNode()).Verify() && !CATy.getName().empty()
+ if (DIDescriptor(CATy).Verify() && !CATy.getName().empty()
&& !CATy.isForwardDecl()) {
- if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode()))
- ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry());
+ CompileUnit *TheCU = getCompileUnit(CATy);
+ if (DIEEntry *Entry = TheCU->getDIEEntry(CATy))
+ TheCU->addGlobalType(CATy.getName(), Entry->getEntry());
}
}
}
@@ -1674,9 +1683,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
ScopeDIE = constructInlinedScopeDIE(Scope);
else if (DS.isSubprogram()) {
if (Scope->isAbstractScope())
- ScopeDIE = ModuleCU->getDIE(DS.getNode());
+ ScopeDIE = getCompileUnit(DS)->getDIE(DS);
else
- ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
+ ScopeDIE = updateSubprogramScopeDIE(DS);
}
else
ScopeDIE = constructLexicalScopeDIE(Scope);
@@ -1700,7 +1709,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
}
if (DS.isSubprogram())
- addPubTypes(DISubprogram(DS.getNode()));
+ addPubTypes(DISubprogram(DS));
return ScopeDIE;
}
@@ -1744,11 +1753,12 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
/// getOrCreateNameSpace - Create a DIE for DINameSpace.
DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
- DIE *NDie = ModuleCU->getDIE(NS.getNode());
+ CompileUnit *TheCU = getCompileUnit(NS);
+ DIE *NDie = TheCU->getDIE(NS);
if (NDie)
return NDie;
NDie = new DIE(dwarf::DW_TAG_namespace);
- ModuleCU->insertDIE(NS.getNode(), NDie);
+ TheCU->insertDIE(NS, NDie);
if (!NS.getName().empty())
addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName());
addSourceLine(NDie, &NS);
@@ -1756,12 +1766,10 @@ DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) {
return NDie;
}
-void DwarfDebug::constructCompileUnit(MDNode *N) {
+/// constructCompileUnit - Create new CompileUnit for the given
+/// metadata node with tag DW_TAG_compile_unit.
+void DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
- // Use first compile unit marked as isMain as the compile unit for this
- // module.
- if (ModuleCU || !DIUnit.isMain())
- return;
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
unsigned ID = GetOrCreateSourceID(Dir, FN);
@@ -1794,12 +1802,44 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers,
dwarf::DW_FORM_data1, RVer);
- assert(!ModuleCU &&
- "ModuleCU assigned since the top of constructCompileUnit");
- ModuleCU = new CompileUnit(ID, Die);
+ CompileUnit *NewCU = new CompileUnit(ID, Die);
+ if (!FirstCU)
+ FirstCU = NewCU;
+ CUMap.insert(std::make_pair(N, NewCU));
+}
+
+/// getCompielUnit - Get CompileUnit DIE.
+CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
+ assert (N && "Invalid DwarfDebug::getCompileUnit argument!");
+ DIDescriptor D(N);
+ const MDNode *CUNode = NULL;
+ if (D.isCompileUnit())
+ CUNode = N;
+ else if (D.isSubprogram())
+ CUNode = DISubprogram(N).getCompileUnit();
+ else if (D.isType())
+ CUNode = DIType(N).getCompileUnit();
+ else if (D.isGlobalVariable())
+ CUNode = DIGlobalVariable(N).getCompileUnit();
+ else if (D.isVariable())
+ CUNode = DIVariable(N).getCompileUnit();
+ else if (D.isNameSpace())
+ CUNode = DINameSpace(N).getCompileUnit();
+ else if (D.isFile())
+ CUNode = DIFile(N).getCompileUnit();
+ else
+ return FirstCU;
+
+ DenseMap<const MDNode *, CompileUnit *>::const_iterator I
+ = CUMap.find(CUNode);
+ if (I == CUMap.end())
+ return FirstCU;
+ return I->second;
}
-void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
+
+/// constructGlobalVariableDIE - Construct global variable DIE.
+void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
DIGlobalVariable DI_GV(N);
// If debug information is malformed then ignore it.
@@ -1807,7 +1847,8 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
return;
// Check for pre-existence.
- if (ModuleCU->getDIE(DI_GV.getNode()))
+ CompileUnit *TheCU = getCompileUnit(N);
+ if (TheCU->getDIE(DI_GV))
return;
DIE *VariableDie = createGlobalVariableDIE(DI_GV);
@@ -1815,7 +1856,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
return;
// Add to map.
- ModuleCU->insertDIE(N, VariableDie);
+ TheCU->insertDIE(N, VariableDie);
// Add to context owner.
DIDescriptor GVContext = DI_GV.getContext();
@@ -1823,7 +1864,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
// or a subprogram.
if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
!GVContext.isFile() &&
- !isSubprogramContext(GVContext.getNode())) {
+ !isSubprogramContext(GVContext)) {
// Create specification DIE.
DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
@@ -1834,7 +1875,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
Asm->Mang->getSymbol(DI_GV.getGlobal()));
addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block);
addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
- ModuleCU->addDie(VariableSpecDIE);
+ TheCU->addDie(VariableSpecDIE);
} else {
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
@@ -1845,23 +1886,25 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
addToContextOwner(VariableDie, GVContext);
// Expose as global. FIXME - need to check external flag.
- ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
+ TheCU->addGlobal(DI_GV.getName(), VariableDie);
DIType GTy = DI_GV.getType();
if (GTy.isCompositeType() && !GTy.getName().empty()
&& !GTy.isForwardDecl()) {
- DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
+ DIEEntry *Entry = TheCU->getDIEEntry(GTy);
assert(Entry && "Missing global type!");
- ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
+ TheCU->addGlobalType(GTy.getName(), Entry->getEntry());
}
return;
}
-void DwarfDebug::constructSubprogramDIE(MDNode *N) {
+/// construct SubprogramDIE - Construct subprogram DIE.
+void DwarfDebug::constructSubprogramDIE(const MDNode *N) {
DISubprogram SP(N);
// Check for pre-existence.
- if (ModuleCU->getDIE(N))
+ CompileUnit *TheCU = getCompileUnit(N);
+ if (TheCU->getDIE(N))
return;
if (!SP.isDefinition())
@@ -1872,13 +1915,13 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
DIE *SubprogramDie = createSubprogramDIE(SP);
// Add to map.
- ModuleCU->insertDIE(N, SubprogramDie);
+ TheCU->insertDIE(N, SubprogramDie);
// Add to context owner.
addToContextOwner(SubprogramDie, SP.getContext());
// Expose as global.
- ModuleCU->addGlobal(SP.getName(), SubprogramDie);
+ TheCU->addGlobal(SP.getName(), SubprogramDie);
return;
}
@@ -1952,7 +1995,7 @@ void DwarfDebug::beginModule(Module *M) {
/// endModule - Emit all Dwarf sections that should come after the content.
///
void DwarfDebug::endModule() {
- if (!ModuleCU) return;
+ if (!FirstCU) return;
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1961,12 +2004,12 @@ void DwarfDebug::endModule() {
addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
}
- for (DenseMap<DIE *, MDNode *>::iterator CI = ContainingTypeMap.begin(),
+ for (DenseMap<DIE *, const MDNode *>::iterator CI = ContainingTypeMap.begin(),
CE = ContainingTypeMap.end(); CI != CE; ++CI) {
DIE *SPDie = CI->first;
- MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+ const MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
if (!N) continue;
- DIE *NDie = ModuleCU->getDIE(N);
+ DIE *NDie = getCompileUnit(N)->getDIE(N);
if (!NDie) continue;
addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
}
@@ -2027,68 +2070,48 @@ void DwarfDebug::endModule() {
// Emit info into a debug str section.
emitDebugStr();
- delete ModuleCU;
- ModuleCU = NULL; // Reset for the next Module, if any.
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I)
+ delete I->second;
+ FirstCU = NULL; // Reset for the next Module, if any.
}
/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
- unsigned FrameIdx,
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
DebugLoc ScopeLoc) {
- DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var);
if (AbsDbgVariable)
return AbsDbgVariable;
- LLVMContext &Ctx = Var.getNode()->getContext();
+ LLVMContext &Ctx = Var->getContext();
DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
if (!Scope)
return NULL;
- AbsDbgVariable = new DbgVariable(Var, FrameIdx,
- NULL /* No more-abstract variable*/);
+ AbsDbgVariable = new DbgVariable(Var);
Scope->addVariable(AbsDbgVariable);
- AbstractVariables[Var.getNode()] = AbsDbgVariable;
+ AbstractVariables[Var] = AbsDbgVariable;
return AbsDbgVariable;
}
-/// findAbstractVariable - Find abstract variable, if any, associated with Var.
-/// FIXME : Refactor findAbstractVariable.
-DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
- const MachineInstr *MI,
- DebugLoc ScopeLoc) {
-
- DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
- if (AbsDbgVariable)
- return AbsDbgVariable;
-
- LLVMContext &Ctx = Var.getNode()->getContext();
- DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope(Ctx));
- if (!Scope)
- return NULL;
-
- AbsDbgVariable = new DbgVariable(Var, MI,
- NULL /* No more-abstract variable*/);
- Scope->addVariable(AbsDbgVariable);
- AbstractVariables[Var.getNode()] = AbsDbgVariable;
- DbgValueStartMap[MI] = AbsDbgVariable;
- return AbsDbgVariable;
-}
-
-/// collectVariableInfo - Populate DbgScope entries with variables' info.
-void DwarfDebug::collectVariableInfo() {
+/// collectVariableInfoFromMMITable - Collect variable information from
+/// side table maintained by MMI.
+void
+DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &Processed) {
const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
-
MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
VE = VMap.end(); VI != VE; ++VI) {
- MDNode *Var = VI->first;
+ const MDNode *Var = VI->first;
if (!Var) continue;
+ Processed.insert(Var);
DIVariable DV(Var);
const std::pair<unsigned, DebugLoc> &VP = VI->second;
DbgScope *Scope = 0;
- if (MDNode *IA = VP.second.getInlinedAt(Ctx))
+ if (const MDNode *IA = VP.second.getInlinedAt(Ctx))
Scope = ConcreteScopes.lookup(IA);
if (Scope == 0)
Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
@@ -2097,100 +2120,192 @@ void DwarfDebug::collectVariableInfo() {
if (Scope == 0)
continue;
- DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, VP.second);
- DbgVariable *RegVar = new DbgVariable(DV, VP.first, AbsDbgVariable);
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second);
+ DbgVariable *RegVar = new DbgVariable(DV);
+ recordVariableFrameIndex(RegVar, VP.first);
Scope->addVariable(RegVar);
+ if (AbsDbgVariable) {
+ recordVariableFrameIndex(AbsDbgVariable, VP.first);
+ VarToAbstractVarMap[RegVar] = AbsDbgVariable;
+ }
}
+}
+/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in undefined reg.
+static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
+ assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
+ return true;
+ return false;
+}
+
+/// isDbgValueInDefinedReg - Return true if debug value, encoded by
+/// DBG_VALUE instruction, is in a defined reg.
+static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
+ assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
+ if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg())
+ return true;
+ return false;
+}
+
+/// collectVariableInfo - Populate DbgScope entries with variables' info.
+void DwarfDebug::collectVariableInfo(const MachineFunction *MF) {
+ SmallPtrSet<const MDNode *, 16> Processed;
+
+ /// collection info from MMI table.
+ collectVariableInfoFromMMITable(MF, Processed);
+
+ SmallVector<const MachineInstr *, 8> DbgValues;
// Collect variable information from DBG_VALUE machine instructions;
for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
- I != E; ++I) {
+ I != E; ++I)
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- if (!MInsn->isDebugValue())
+ if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
continue;
+ DbgValues.push_back(MInsn);
+ }
- // Ignore Undef values.
- if (MInsn->getOperand(0).isReg() && !MInsn->getOperand(0).getReg())
- continue;
+ // This is a collection of DBV_VALUE instructions describing same variable.
+ SmallVector<const MachineInstr *, 4> MultipleValues;
+ for(SmallVector<const MachineInstr *, 8>::iterator I = DbgValues.begin(),
+ E = DbgValues.end(); I != E; ++I) {
+ const MachineInstr *MInsn = *I;
+ MultipleValues.clear();
+ if (isDbgValueInDefinedReg(MInsn))
+ MultipleValues.push_back(MInsn);
+ DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata());
+ if (Processed.count(DV) != 0)
+ continue;
+
+ for (SmallVector<const MachineInstr *, 8>::iterator MI = I+1,
+ ME = DbgValues.end(); MI != ME; ++MI) {
+ const MDNode *Var =
+ (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
+ if (Var == DV && isDbgValueInDefinedReg(*MI))
+ MultipleValues.push_back(*MI);
+ }
+
+ DbgScope *Scope = findDbgScope(MInsn);
+ if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable)
+ Scope = CurrentFnDbgScope;
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
- DIVariable DV(
- const_cast<MDNode *>(MInsn->getOperand(MInsn->getNumOperands() - 1)
- .getMetadata()));
- if (DV.getTag() == dwarf::DW_TAG_arg_variable) {
- // FIXME Handle inlined subroutine arguments.
- DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
- CurrentFnDbgScope->addVariable(ArgVar);
- DbgValueStartMap[MInsn] = ArgVar;
+ Processed.insert(DV);
+ DbgVariable *RegVar = new DbgVariable(DV);
+ Scope->addVariable(RegVar);
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
+ if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
+ DbgVariableToDbgInstMap[AbsVar] = MInsn;
+ VarToAbstractVarMap[RegVar] = AbsVar;
+ }
+ if (MultipleValues.size() <= 1) {
+ DbgVariableToDbgInstMap[RegVar] = MInsn;
+ continue;
+ }
+
+ // handle multiple DBG_VALUE instructions describing one variable.
+ if (DotDebugLocEntries.empty())
+ RegVar->setDotDebugLocOffset(0);
+ else
+ RegVar->setDotDebugLocOffset(DotDebugLocEntries.size());
+ const MachineInstr *Begin = NULL;
+ const MachineInstr *End = NULL;
+ for (SmallVector<const MachineInstr *, 4>::iterator
+ MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) {
+ if (!Begin) {
+ Begin = *MVI;
continue;
+ }
+ End = *MVI;
+ MachineLocation MLoc;
+ MLoc.set(Begin->getOperand(0).getReg(), 0);
+ const MCSymbol *FLabel = getLabelBeforeInsn(Begin);
+ const MCSymbol *SLabel = getLabelBeforeInsn(End);
+ DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc));
+ Begin = End;
+ if (MVI + 1 == MVE) {
+ // If End is the last instruction then its value is valid
+ // until the end of the funtion.
+ MLoc.set(End->getOperand(0).getReg(), 0);
+ DotDebugLocEntries.
+ push_back(DotDebugLocEntry(SLabel, FunctionEndSym, MLoc));
}
+ }
+ DotDebugLocEntries.push_back(DotDebugLocEntry());
+ }
- DebugLoc DL = MInsn->getDebugLoc();
- if (DL.isUnknown()) continue;
- DbgScope *Scope = 0;
- if (MDNode *IA = DL.getInlinedAt(Ctx))
- Scope = ConcreteScopes.lookup(IA);
- if (Scope == 0)
- Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-
- // If variable scope is not found then skip this variable.
- if (Scope == 0)
+ // Collect info for variables that were optimized out.
+ if (NamedMDNode *NMD =
+ MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) {
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
+ DIVariable DV(cast_or_null<MDNode>(NMD->getOperand(i)));
+ if (!DV || !Processed.insert(DV))
continue;
-
- DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, DL);
- DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
- DbgValueStartMap[MInsn] = RegVar;
- Scope->addVariable(RegVar);
+ DbgScope *Scope = DbgScopeMap.lookup(DV.getContext());
+ if (Scope)
+ Scope->addVariable(new DbgVariable(DV));
}
}
}
+/// getLabelBeforeInsn - Return Label preceding the instruction.
+const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) {
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsBeforeInsn.find(MI);
+ if (I == LabelsBeforeInsn.end())
+ // FunctionBeginSym always preceeds all the instruction in current function.
+ return FunctionBeginSym;
+ return I->second;
+}
+
+/// getLabelAfterInsn - Return Label immediately following the instruction.
+const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
+ DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
+ LabelsAfterInsn.find(MI);
+ if (I == LabelsAfterInsn.end())
+ return NULL;
+ return I->second;
+}
+
/// beginScope - Process beginning of a scope.
void DwarfDebug::beginScope(const MachineInstr *MI) {
- // Check location.
- DebugLoc DL = MI->getDebugLoc();
- if (DL.isUnknown())
+ if (InsnNeedsLabel.count(MI) == 0) {
+ LabelsBeforeInsn[MI] = PrevLabel;
return;
+ }
- MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
-
- // FIXME: Should only verify each scope once!
- if (!DIScope(Scope).Verify())
+ // Check location.
+ DebugLoc DL = MI->getDebugLoc();
+ if (!DL.isUnknown()) {
+ const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext());
+ PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
+ PrevInstLoc = DL;
+ LabelsBeforeInsn[MI] = PrevLabel;
return;
+ }
- // DBG_VALUE instruction establishes new value.
+ // If location is unknown then use temp label for this DBG_VALUE
+ // instruction.
if (MI->isDebugValue()) {
- DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
- = DbgValueStartMap.find(MI);
- if (DI != DbgValueStartMap.end()) {
- MCSymbol *Label = NULL;
- if (DL == PrevInstLoc)
- Label = PrevLabel;
- else {
- Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
- PrevInstLoc = DL;
- PrevLabel = Label;
- }
- DI->second->setDbgValueLabel(Label);
- }
+ PrevLabel = MMI->getContext().CreateTempSymbol();
+ Asm->OutStreamer.EmitLabel(PrevLabel);
+ LabelsBeforeInsn[MI] = PrevLabel;
return;
}
- // Emit a label to indicate location change. This is used for line
- // table even if this instruction does not start a new scope.
- MCSymbol *Label = NULL;
- if (DL == PrevInstLoc)
- Label = PrevLabel;
- else {
- Label = recordSourceLine(DL.getLine(), DL.getCol(), Scope);
- PrevInstLoc = DL;
- PrevLabel = Label;
+ if (UnknownLocations) {
+ PrevLabel = recordSourceLine(0, 0, 0);
+ LabelsBeforeInsn[MI] = PrevLabel;
+ return;
}
- // If this instruction begins a scope then note down corresponding label.
- if (InsnsBeginScopeSet.count(MI) != 0)
- LabelsBeforeInsn[MI] = Label;
+ assert (0 && "Instruction is not processed!");
}
/// endScope - Process end of a scope.
@@ -2204,7 +2319,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
}
/// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) {
if (!InlinedAt) {
DbgScope *WScope = DbgScopeMap.lookup(Scope);
if (WScope)
@@ -2213,7 +2328,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
DbgScopeMap.insert(std::make_pair(Scope, WScope));
if (DIDescriptor(Scope).isLexicalBlock()) {
DbgScope *Parent =
- getOrCreateDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+ getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
WScope->setParent(Parent);
Parent->addScope(WScope);
}
@@ -2235,7 +2350,7 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
DILocation DL(InlinedAt);
DbgScope *Parent =
- getOrCreateDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
+ getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation());
WScope->setParent(Parent);
Parent->addScope(WScope);
@@ -2249,13 +2364,13 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt) {
/// machine instruction encodes valid location info.
static bool hasValidLocation(LLVMContext &Ctx,
const MachineInstr *MInsn,
- MDNode *&Scope, MDNode *&InlinedAt) {
+ const MDNode *&Scope, const MDNode *&InlinedAt) {
if (MInsn->isDebugValue())
return false;
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) return false;
- MDNode *S = DL.getScope(Ctx);
+ const MDNode *S = DL.getScope(Ctx);
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
@@ -2307,8 +2422,8 @@ void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- MDNode *Scope = NULL;
- MDNode *InlinedAt = NULL;
+ const MDNode *Scope = NULL;
+ const MDNode *InlinedAt = NULL;
// Check if instruction has valid location information.
if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
@@ -2344,8 +2459,8 @@ bool DwarfDebug::extractScopeInformation() {
LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
SmallVector<DbgRange, 4> MIRanges;
DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
- MDNode *PrevScope = NULL;
- MDNode *PrevInlinedAt = NULL;
+ const MDNode *PrevScope = NULL;
+ const MDNode *PrevInlinedAt = NULL;
const MachineInstr *RangeBeginMI = NULL;
const MachineInstr *PrevMI = NULL;
for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
@@ -2353,8 +2468,8 @@ bool DwarfDebug::extractScopeInformation() {
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- MDNode *Scope = NULL;
- MDNode *InlinedAt = NULL;
+ const MDNode *Scope = NULL;
+ const MDNode *InlinedAt = NULL;
// Check if instruction has valid location information.
if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
@@ -2476,8 +2591,6 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo()) return;
if (!extractScopeInformation()) return;
-
- collectVariableInfo();
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
@@ -2489,7 +2602,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DebugLoc FDL = FindFirstDebugLoc(MF);
if (FDL.isUnknown()) return;
- MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
+ const MDNode *Scope = FDL.getScope(MF->getFunction()->getContext());
DISubprogram SP = getDISubprogram(Scope);
unsigned Line, Col;
@@ -2502,6 +2615,40 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
}
recordSourceLine(Line, Col, Scope);
+
+ DebugLoc PrevLoc;
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I)
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+ DebugLoc DL = MI->getDebugLoc();
+ if (MI->isDebugValue()) {
+ // DBG_VALUE needs a label if the variable is local variable or
+ // an argument whose location is changing.
+ assert (MI->getNumOperands() > 1 && "Invalid machine instruction!");
+ DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
+ if (!DV.Verify()) continue;
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
+ InsnNeedsLabel.insert(MI);
+ else if (!ProcessedArgs.insert(DV))
+ InsnNeedsLabel.insert(MI);
+ } else {
+ // If location is unknown then instruction needs a location only if
+ // UnknownLocations flag is set.
+ if (DL.isUnknown()) {
+ if (UnknownLocations && !PrevLoc.isUnknown())
+ InsnNeedsLabel.insert(MI);
+ } else if (DL != PrevLoc)
+ // Otherwise, instruction needs a location only if it is new location.
+ InsnNeedsLabel.insert(MI);
+ }
+
+ if (!DL.isUnknown() || UnknownLocations)
+ PrevLoc = DL;
+ }
+
+ PrevLabel = FunctionBeginSym;
}
/// endFunction - Gather and emit post-function debug information.
@@ -2510,10 +2657,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo() || DbgScopeMap.empty()) return;
if (CurrentFnDbgScope) {
+
// Define end label for subprogram.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("func_end",
- Asm->getFunctionNumber()));
+ FunctionEndSym = Asm->GetTempSymbol("func_end",
+ Asm->getFunctionNumber());
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(FunctionEndSym);
+ collectVariableInfo(MF);
+
// Get function line info.
if (!Lines.empty()) {
// Get section line info.
@@ -2543,10 +2695,15 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
// Clear debug info
CurrentFnDbgScope = NULL;
+ InsnNeedsLabel.clear();
+ ProcessedArgs.clear();
+ DbgVariableToFrameIndexMap.clear();
+ VarToAbstractVarMap.clear();
+ DbgVariableToDbgInstMap.clear();
+ DbgVariableLabelsMap.clear();
DeleteContainerSeconds(DbgScopeMap);
InsnsBeginScopeSet.clear();
InsnsEndScopeSet.clear();
- DbgValueStartMap.clear();
ConcreteScopes.clear();
DeleteContainerSeconds(AbstractScopes);
AbstractScopesList.clear();
@@ -2557,30 +2714,82 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
PrevLabel = NULL;
}
+/// recordVariableFrameIndex - Record a variable's index.
+void DwarfDebug::recordVariableFrameIndex(const DbgVariable *V, int Index) {
+ assert (V && "Invalid DbgVariable!");
+ DbgVariableToFrameIndexMap[V] = Index;
+}
+
+/// findVariableFrameIndex - Return true if frame index for the variable
+/// is found. Update FI to hold value of the index.
+bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
+ assert (V && "Invalid DbgVariable!");
+ DenseMap<const DbgVariable *, int>::iterator I =
+ DbgVariableToFrameIndexMap.find(V);
+ if (I == DbgVariableToFrameIndexMap.end())
+ return false;
+ *FI = I->second;
+ return true;
+}
+
+/// findVariableLabel - Find MCSymbol for the variable.
+const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
+ DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
+ = DbgVariableLabelsMap.find(V);
+ if (I == DbgVariableLabelsMap.end())
+ return NULL;
+ else return I->second;
+}
+
+/// findDbgScope - Find DbgScope for the debug loc attached with an
+/// instruction.
+DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
+ DbgScope *Scope = NULL;
+ LLVMContext &Ctx =
+ MInsn->getParent()->getParent()->getFunction()->getContext();
+ DebugLoc DL = MInsn->getDebugLoc();
+
+ if (DL.isUnknown())
+ return Scope;
+
+ if (const MDNode *IA = DL.getInlinedAt(Ctx))
+ Scope = ConcreteScopes.lookup(IA);
+ if (Scope == 0)
+ Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
+
+ return Scope;
+}
+
+
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
-MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
+MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) {
StringRef Dir;
StringRef Fn;
- DIDescriptor Scope(S);
- if (Scope.isCompileUnit()) {
- DICompileUnit CU(S);
- Dir = CU.getDirectory();
- Fn = CU.getFilename();
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(S);
- Dir = SP.getDirectory();
- Fn = SP.getFilename();
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(S);
- Dir = DB.getDirectory();
- Fn = DB.getFilename();
- } else
- assert(0 && "Unexpected scope info");
+ unsigned Src = 1;
+ if (S) {
+ DIDescriptor Scope(S);
+
+ if (Scope.isCompileUnit()) {
+ DICompileUnit CU(S);
+ Dir = CU.getDirectory();
+ Fn = CU.getFilename();
+ } else if (Scope.isSubprogram()) {
+ DISubprogram SP(S);
+ Dir = SP.getDirectory();
+ Fn = SP.getFilename();
+ } else if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(S);
+ Dir = DB.getDirectory();
+ Fn = DB.getFilename();
+ } else
+ assert(0 && "Unexpected scope info");
+
+ Src = GetOrCreateSourceID(Dir, Fn);
+ }
- unsigned Src = GetOrCreateSourceID(Dir, Fn);
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
@@ -2643,14 +2852,18 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
/// computeSizeAndOffsets - Compute the size and offset of all the DIEs.
///
void DwarfDebug::computeSizeAndOffsets() {
- // Compute size of compile unit header.
- static unsigned Offset =
- sizeof(int32_t) + // Length of Compilation Unit Info
- sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t); // Pointer Size (in bytes)
-
- computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true);
+ unsigned PrevOffset = 0;
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ // Compute size of compile unit header.
+ static unsigned Offset = PrevOffset +
+ sizeof(int32_t) + // Length of Compilation Unit Info
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t); // Pointer Size (in bytes)
+ computeSizeAndOffset(I->second->getCUDie(), Offset, true);
+ PrevOffset = Offset;
+ }
}
/// EmitSectionSym - Switch to the specified MCSection and emit an assembler
@@ -2694,6 +2907,9 @@ void DwarfDebug::EmitSectionLabels() {
DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(),
"debug_range");
+ DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(),
+ "section_debug_loc");
+
TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin");
EmitSectionSym(Asm, TLOF.getDataSection());
}
@@ -2745,6 +2961,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
4);
break;
}
+ case dwarf::DW_AT_location: {
+ if (UseDotDebugLocEntry.count(Die) != 0) {
+ DIELabel *L = cast<DIELabel>(Values[i]);
+ Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4);
+ } else
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
default:
// Emit an attribute using the defined form.
Values[i]->EmitValue(Asm, Form);
@@ -2771,37 +2995,41 @@ void DwarfDebug::emitDebugInfo() {
// Start debug info section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfInfoSection());
- DIE *Die = ModuleCU->getCUDie();
-
- // Emit the compile units header.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
- ModuleCU->getID()));
-
- // Emit size of content not including length itself
- unsigned ContentSize = Die->getSize() +
- sizeof(int16_t) + // DWARF version number
- sizeof(int32_t) + // Offset Into Abbrev. Section
- sizeof(int8_t) + // Pointer Size (in bytes)
- sizeof(int32_t); // FIXME - extra pad for gdb bug.
-
- Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
- Asm->EmitInt32(ContentSize);
- Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
- Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
- DwarfAbbrevSectionSym);
- Asm->OutStreamer.AddComment("Address Size (in bytes)");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize());
-
- emitDIE(Die);
- // FIXME - extra padding for gdb bug.
- Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
- Asm->EmitInt8(0);
- Asm->EmitInt8(0);
- Asm->EmitInt8(0);
- Asm->EmitInt8(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", ModuleCU->getID()));
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ DIE *Die = TheCU->getCUDie();
+
+ // Emit the compile units header.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin",
+ TheCU->getID()));
+
+ // Emit size of content not including length itself
+ unsigned ContentSize = Die->getSize() +
+ sizeof(int16_t) + // DWARF version number
+ sizeof(int32_t) + // Offset Into Abbrev. Section
+ sizeof(int8_t) + // Pointer Size (in bytes)
+ sizeof(int32_t); // FIXME - extra pad for gdb bug.
+
+ Asm->OutStreamer.AddComment("Length of Compilation Unit Info");
+ Asm->EmitInt32(ContentSize);
+ Asm->OutStreamer.AddComment("DWARF version number");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+ Asm->OutStreamer.AddComment("Offset Into Abbrev. Section");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"),
+ DwarfAbbrevSectionSym);
+ Asm->OutStreamer.AddComment("Address Size (in bytes)");
+ Asm->EmitInt8(Asm->getTargetData().getPointerSize());
+
+ emitDIE(Die);
+ // FIXME - extra padding for gdb bug.
+ Asm->OutStreamer.AddComment("4 extra padding bytes for GDB");
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(0);
+ Asm->EmitInt8(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID()));
+ }
}
/// emitAbbreviations - Emit the abbreviation section.
@@ -2967,8 +3195,6 @@ void DwarfDebug::emitDebugLines() {
MCSymbol *Label = LineInfo.getLabel();
if (!Label->isDefined()) continue; // Not emitted, in dead code.
- if (LineInfo.getLine() == 0) continue;
-
if (Asm->isVerbose()) {
std::pair<unsigned, unsigned> SrcID =
getSourceDirectoryAndFileIds(LineInfo.getSourceID());
@@ -3128,91 +3354,99 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) {
/// emitDebugPubNames - Emit visible names into a debug pubnames section.
///
void DwarfDebug::emitDebugPubNames() {
- // Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubNamesSection());
-
- Asm->OutStreamer.AddComment("Length of Public Names Info");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("pubnames_end", ModuleCU->getID()),
- Asm->GetTempSymbol("pubnames_begin", ModuleCU->getID()), 4);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
- ModuleCU->getID()));
-
- Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
- Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
- DwarfInfoSectionSym);
-
- Asm->OutStreamer.AddComment("Compilation Unit Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
- Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
- 4);
-
- const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
- for (StringMap<DIE*>::const_iterator
- GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
- const char *Name = GI->getKeyData();
- DIE *Entity = GI->second;
-
- Asm->OutStreamer.AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
- if (Asm->isVerbose())
- Asm->OutStreamer.AddComment("External Name");
- Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ Asm->OutStreamer.AddComment("Length of Public Names Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubnames_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin",
+ TheCU->getID()));
+
+ Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobals();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE *Entity = GI->second;
+
+ Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose())
+ Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
+ TheCU->getID()));
}
-
- Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end",
- ModuleCU->getID()));
}
void DwarfDebug::emitDebugPubTypes() {
- // Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubTypesSection());
- Asm->OutStreamer.AddComment("Length of Public Types Info");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("pubtypes_end", ModuleCU->getID()),
- Asm->GetTempSymbol("pubtypes_begin", ModuleCU->getID()), 4);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
- ModuleCU->getID()));
-
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
- Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info");
- Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
- DwarfInfoSectionSym);
-
- Asm->OutStreamer.AddComment("Compilation ModuleCU Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", ModuleCU->getID()),
- Asm->GetTempSymbol("info_begin", ModuleCU->getID()),
- 4);
-
- const StringMap<DIE*> &Globals = ModuleCU->getGlobalTypes();
- for (StringMap<DIE*>::const_iterator
- GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
- const char *Name = GI->getKeyData();
- DIE * Entity = GI->second;
-
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
- Asm->EmitInt32(Entity->getOffset());
+ for (DenseMap<const MDNode *, CompileUnit *>::iterator I = CUMap.begin(),
+ E = CUMap.end(); I != E; ++I) {
+ CompileUnit *TheCU = I->second;
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubTypesSection());
+ Asm->OutStreamer.AddComment("Length of Public Types Info");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("pubtypes_end", TheCU->getID()),
+ Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin",
+ TheCU->getID()));
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
- Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version");
+ Asm->EmitInt16(dwarf::DWARF_VERSION);
+
+ Asm->OutStreamer.AddComment("Offset of Compilation Unit Info");
+ Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ DwarfInfoSectionSym);
+
+ Asm->OutStreamer.AddComment("Compilation Unit Length");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()),
+ Asm->GetTempSymbol("info_begin", TheCU->getID()),
+ 4);
+
+ const StringMap<DIE*> &Globals = TheCU->getGlobalTypes();
+ for (StringMap<DIE*>::const_iterator
+ GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
+ const char *Name = GI->getKeyData();
+ DIE * Entity = GI->second;
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset");
+ Asm->EmitInt32(Entity->getOffset());
+
+ if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name");
+ Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0);
+ }
+
+ Asm->OutStreamer.AddComment("End Mark");
+ Asm->EmitInt32(0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
+ TheCU->getID()));
}
-
- Asm->OutStreamer.AddComment("End Mark");
- Asm->EmitInt32(0);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end",
- ModuleCU->getID()));
}
/// emitDebugStr - Emit visible names into a debug str section.
@@ -3248,9 +3482,39 @@ void DwarfDebug::emitDebugStr() {
/// emitDebugLoc - Emit visible names into a debug loc section.
///
void DwarfDebug::emitDebugLoc() {
+ if (DotDebugLocEntries.empty())
+ return;
+
// Start the dwarf loc section.
Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
+ Asm->getObjFileLowering().getDwarfLocSection());
+ unsigned char Size = Asm->getTargetData().getPointerSize();
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0));
+ unsigned index = 1;
+ for (SmallVector<DotDebugLocEntry, 4>::iterator I = DotDebugLocEntries.begin(),
+ E = DotDebugLocEntries.end(); I != E; ++I, ++index) {
+ DotDebugLocEntry Entry = *I;
+ if (Entry.isEmpty()) {
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index));
+ } else {
+ Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0);
+ Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0);
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false);
+ if (Reg < 32) {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1);
+ Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg);
+ } else {
+ Asm->OutStreamer.AddComment("Loc expr size");
+ Asm->EmitInt16(1+MCAsmInfo::getULEB128Size(Reg));
+ Asm->EmitInt8(dwarf::DW_OP_regx);
+ Asm->EmitULEB128(Reg);
+ }
+ }
+ }
}
/// EmitDebugARanges - Emit visible names into a debug aranges section.
@@ -3310,7 +3574,7 @@ void DwarfDebug::emitDebugInlineInfo() {
if (!Asm->MAI->doesDwarfUsesInlineInfoSection())
return;
- if (!ModuleCU)
+ if (!FirstCU)
return;
Asm->OutStreamer.SwitchSection(
@@ -3327,11 +3591,11 @@ void DwarfDebug::emitDebugInlineInfo() {
Asm->OutStreamer.AddComment("Address Size (in bytes)");
Asm->EmitInt8(Asm->getTargetData().getPointerSize());
- for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ for (SmallVector<const MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
E = InlinedSPNodes.end(); I != E; ++I) {
- MDNode *Node = *I;
- DenseMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
+ const MDNode *Node = *I;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II
= InlineInfo.find(Node);
SmallVector<InlineInfoLabels, 4> &Labels = II->second;
DISubprogram SP(Node);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index b964b23..0d6116f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -15,6 +15,7 @@
#define CODEGEN_ASMPRINTER_DWARFDEBUG_H__
#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineLocation.h"
#include "DIE.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -30,7 +31,6 @@ class DbgConcreteScope;
class DbgScope;
class DbgVariable;
class MachineFrameInfo;
-class MachineLocation;
class MachineModuleInfo;
class MachineOperand;
class MCAsmInfo;
@@ -82,8 +82,8 @@ class DwarfDebug {
// Attributes used to construct specific Dwarf sections.
//
- /// ModuleCU - All DIEs are inserted in ModuleCU.
- CompileUnit *ModuleCU;
+ CompileUnit *FirstCU;
+ DenseMap <const MDNode *, CompileUnit *> CUMap;
/// AbbreviationsSet - Used to uniquely define abbreviations.
///
@@ -146,15 +146,15 @@ class DwarfDebug {
/// DbgScopeMap - Tracks the scopes in the current function. Owns the
/// contained DbgScope*s.
///
- DenseMap<MDNode *, DbgScope *> DbgScopeMap;
+ DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
/// ConcreteScopes - Tracks the concrete scopees in the current function.
/// These scopes are also included in DbgScopeMap.
- DenseMap<MDNode *, DbgScope *> ConcreteScopes;
+ DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
/// AbstractScopes - Tracks the abstract scopes a module. These scopes are
/// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
- DenseMap<MDNode *, DbgScope *> AbstractScopes;
+ DenseMap<const MDNode *, DbgScope *> AbstractScopes;
/// AbstractScopesList - Tracks abstract scopes constructed while processing
/// a function. This list is cleared during endFunction().
@@ -162,13 +162,43 @@ class DwarfDebug {
/// AbstractVariables - Collection on abstract variables. Owned by the
/// DbgScopes in AbstractScopes.
- DenseMap<MDNode *, DbgVariable *> AbstractVariables;
-
- /// DbgValueStartMap - Tracks starting scope of variable DIEs.
- /// If the scope of an object begins sometime after the low pc value for the
- /// scope most closely enclosing the object, the object entry may have a
- /// DW_AT_start_scope attribute.
- DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap;
+ DenseMap<const MDNode *, DbgVariable *> AbstractVariables;
+
+ /// DbgVariableToFrameIndexMap - Tracks frame index used to find
+ /// variable's value.
+ DenseMap<const DbgVariable *, int> DbgVariableToFrameIndexMap;
+
+ /// DbgVariableToDbgInstMap - Maps DbgVariable to corresponding DBG_VALUE
+ /// machine instruction.
+ DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
+
+ /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
+ DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
+
+ /// DotDebugLocEntry - This struct describes location entries emitted in
+ /// .debug_loc section.
+ typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ DotDebugLocEntry() : Begin(0), End(0) {}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
+ MachineLocation &L) : Begin(B), End(E), Loc(L) {}
+ /// Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ } DotDebugLocEntry;
+
+ /// DotDebugLocEntries - Collection of DotDebugLocEntry.
+ SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
+
+ /// UseDotDebugLocEntry - DW_AT_location attributes for the DIEs in this set
+ /// idetifies corresponding .debug_loc entry offset.
+ SmallPtrSet<const DIE *, 4> UseDotDebugLocEntry;
+
+ /// VarToAbstractVarMap - Maps DbgVariable with corresponding Abstract
+ /// DbgVariable, if any.
+ DenseMap<const DbgVariable *, const DbgVariable *> VarToAbstractVarMap;
/// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
@@ -177,7 +207,7 @@ class DwarfDebug {
/// ContainingTypeMap - This map is used to keep track of subprogram DIEs that
/// need DW_AT_containing_type attribute. This attribute points to a DIE that
/// corresponds to the MDNode mapped with the subprogram DIE.
- DenseMap<DIE *, MDNode *> ContainingTypeMap;
+ DenseMap<DIE *, const MDNode *> ContainingTypeMap;
typedef SmallVector<DbgScope *, 2> ScopeVector;
SmallPtrSet<const MachineInstr *, 8> InsnsBeginScopeSet;
@@ -185,9 +215,9 @@ class DwarfDebug {
/// InlineInfo - Keep track of inlined functions and their location. This
/// information is used to populate debug_inlined section.
- typedef std::pair<MCSymbol*, DIE *> InlineInfoLabels;
- DenseMap<MDNode*, SmallVector<InlineInfoLabels, 4> > InlineInfo;
- SmallVector<MDNode *, 4> InlinedSPNodes;
+ typedef std::pair<const MCSymbol *, DIE *> InlineInfoLabels;
+ DenseMap<const MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<const MDNode *, 4> InlinedSPNodes;
/// LabelsBeforeInsn - Maps instruction with label emitted before
/// instruction.
@@ -197,6 +227,13 @@ class DwarfDebug {
/// instruction.
DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn;
+ /// insnNeedsLabel - Collection of instructions that need a label to mark
+ /// a debuggging information entity.
+ SmallPtrSet<const MachineInstr *, 8> InsnNeedsLabel;
+
+ /// ProcessedArgs - Collection of arguments already processed.
+ SmallPtrSet<const MDNode *, 8> ProcessedArgs;
+
SmallVector<const MCSymbol *, 8> DebugRangeSymbols;
/// Previous instruction's location information. This is used to determine
@@ -219,8 +256,8 @@ class DwarfDebug {
// section offsets and are created by EmitSectionLabels.
MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym;
MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym;
-
- MCSymbol *FunctionBeginSym;
+ MCSymbol *DwarfDebugLocSectionSym;
+ MCSymbol *FunctionBeginSym, *FunctionEndSym;
private:
/// getSourceDirectoryAndFileIds - Return the directory and file ids that
@@ -295,7 +332,7 @@ private:
/// addSourceLine - Add location information to specified debug information
/// entry.
void addSourceLine(DIE *Die, const DIVariable *V);
- void addSourceLine(DIE *Die, const DIGlobal *G);
+ void addSourceLine(DIE *Die, const DIGlobalVariable *G);
void addSourceLine(DIE *Die, const DISubprogram *SP);
void addSourceLine(DIE *Die, const DIType *Ty);
void addSourceLine(DIE *Die, const DINameSpace *NS);
@@ -306,13 +343,13 @@ private:
const MachineLocation &Location);
/// addRegisterAddress - Add register location entry in variable DIE.
- bool addRegisterAddress(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+ bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
/// addConstantValue - Add constant value entry in variable DIE.
- bool addConstantValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+ bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
/// addConstantFPValue - Add constant value entry in variable DIE.
- bool addConstantFPValue(DIE *Die, DbgVariable *DV, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
/// addComplexAddress - Start with the address based on the location provided,
/// and generate the DWARF information necessary to find the actual variable
@@ -380,21 +417,18 @@ private:
DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
/// getOrCreateDbgScope - Create DbgScope for the scope.
- DbgScope *getOrCreateDbgScope(MDNode *Scope, MDNode *InlinedAt);
+ DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
- DbgScope *getOrCreateAbstractScope(MDNode *N);
+ DbgScope *getOrCreateAbstractScope(const MDNode *N);
/// findAbstractVariable - Find abstract variable associated with Var.
- DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
- DebugLoc Loc);
- DbgVariable *findAbstractVariable(DIVariable &Var, const MachineInstr *MI,
- DebugLoc Loc);
+ DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc);
/// updateSubprogramScopeDIE - Find DIE for the given subprogram and
/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes.
/// If there are global variables in this scope then create and insert
/// DIEs for these variables.
- DIE *updateSubprogramScopeDIE(MDNode *SPNode);
+ DIE *updateSubprogramScopeDIE(const MDNode *SPNode);
/// constructLexicalScope - Construct new DW_TAG_lexical_block
/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels.
@@ -506,11 +540,18 @@ private:
/// maps as well.
unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
- void constructCompileUnit(MDNode *N);
+ /// constructCompileUnit - Create new CompileUnit for the given
+ /// metadata node with tag DW_TAG_compile_unit.
+ void constructCompileUnit(const MDNode *N);
+
+ /// getCompielUnit - Get CompileUnit DIE.
+ CompileUnit *getCompileUnit(const MDNode *N) const;
- void constructGlobalVariableDIE(MDNode *N);
+ /// constructGlobalVariableDIE - Construct global variable DIE.
+ void constructGlobalVariableDIE(const MDNode *N);
- void constructSubprogramDIE(MDNode *N);
+ /// construct SubprogramDIE - Construct subprogram DIE.
+ void constructSubprogramDIE(const MDNode *N);
// FIXME: This should go away in favor of complex addresses.
/// Find the type the programmer originally declared the variable to be
@@ -521,7 +562,7 @@ private:
/// recordSourceLine - Register a source line with debug info. Returns the
/// unique label that was emitted and which provides correspondence to
/// the source line list.
- MCSymbol *recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope);
+ MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
/// getSourceLineCount - Return the number of source lines in the debug
/// info.
@@ -529,6 +570,20 @@ private:
return Lines.size();
}
+ /// recordVariableFrameIndex - Record a variable's index.
+ void recordVariableFrameIndex(const DbgVariable *V, int Index);
+
+ /// findVariableFrameIndex - Return true if frame index for the variable
+ /// is found. Update FI to hold value of the index.
+ bool findVariableFrameIndex(const DbgVariable *V, int *FI);
+
+ /// findVariableLabel - Find MCSymbol for the variable.
+ const MCSymbol *findVariableLabel(const DbgVariable *V);
+
+ /// findDbgScope - Find DbgScope for the debug loc attached with an
+ /// instruction.
+ DbgScope *findDbgScope(const MachineInstr *MI);
+
/// identifyScopeMarkers() - Indentify instructions that are marking
/// beginning of or end of a scope.
void identifyScopeMarkers();
@@ -538,8 +593,12 @@ private:
bool extractScopeInformation();
/// collectVariableInfo - Populate DbgScope entries with variables' info.
- void collectVariableInfo();
+ void collectVariableInfo(const MachineFunction *);
+ /// collectVariableInfoFromMMITable - Collect variable information from
+ /// side table maintained by MMI.
+ void collectVariableInfoFromMMITable(const MachineFunction * MF,
+ SmallPtrSet<const MDNode *, 16> &P);
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -563,6 +622,12 @@ public:
///
void endFunction(const MachineFunction *MF);
+ /// getLabelBeforeInsn - Return Label preceding the instruction.
+ const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI);
+
+ /// getLabelAfterInsn - Return Label immediately following the instruction.
+ const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
+
/// beginScope - Process beginning of a scope.
void beginScope(const MachineInstr *MI);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 0ff1036..c872840 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -189,7 +189,7 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
// EH Frame, but some environments do not handle weak absolute symbols. If
// UnwindTablesMandatory is set we cannot do this optimization; the unwind
// info is to be available for non-EH uses.
- if (!EHFrameInfo.hasCalls && !UnwindTablesMandatory &&
+ if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
(!TheFunc->isWeakForLinker() ||
!Asm->MAI->getWeakDefDirective() ||
TLOF.getSupportsWeakOmittedEHFrame())) {
@@ -949,11 +949,12 @@ void DwarfException::EndFunction() {
TLOF.isFunctionEHFrameSymbolPrivate());
// Save EH frame information
- EHFrames.push_back(FunctionEHFrameInfo(FunctionEHSym,
- Asm->getFunctionNumber(),
- MMI->getPersonalityIndex(),
- Asm->MF->getFrameInfo()->hasCalls(),
- !MMI->getLandingPads().empty(),
- MMI->getFrameMoves(),
- Asm->MF->getFunction()));
+ EHFrames.
+ push_back(FunctionEHFrameInfo(FunctionEHSym,
+ Asm->getFunctionNumber(),
+ MMI->getPersonalityIndex(),
+ Asm->MF->getFrameInfo()->adjustsStack(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ Asm->MF->getFunction()));
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 5839f8c..bc311e6 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -45,7 +45,7 @@ class DwarfException {
MCSymbol *FunctionEHSym; // L_foo.eh
unsigned Number;
unsigned PersonalityIndex;
- bool hasCalls;
+ bool adjustsStack;
bool hasLandingPads;
std::vector<MachineMove> Moves;
const Function *function;
@@ -55,7 +55,7 @@ class DwarfException {
const std::vector<MachineMove> &M,
const Function *f):
FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
- hasCalls(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
};
std::vector<FunctionEHFrameInfo> EHFrames;
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index a8c3c7b..f92127f 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -104,6 +104,21 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection());
EmitCamlGlobal(getModule(), AP, "frametable");
+ int NumDescriptors = 0;
+ for (iterator I = begin(), IE = end(); I != IE; ++I) {
+ GCFunctionInfo &FI = **I;
+ for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) {
+ NumDescriptors++;
+ }
+ }
+
+ if (NumDescriptors >= 1<<16) {
+ // Very rude!
+ report_fatal_error(" Too much descriptor for ocaml GC");
+ }
+ AP.EmitInt16(NumDescriptors);
+ AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+
for (iterator I = begin(), IE = end(); I != IE; ++I) {
GCFunctionInfo &FI = **I;
@@ -135,11 +150,13 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) {
for (GCFunctionInfo::live_iterator K = FI.live_begin(J),
KE = FI.live_end(J); K != KE; ++K) {
- assert(K->StackOffset < 1<<16 &&
- "GC root stack offset is outside of fixed stack frame and out "
- "of range for ocaml GC!");
-
- AP.EmitInt32(K->StackOffset);
+ if (K->StackOffset >= 1<<16) {
+ // Very rude!
+ report_fatal_error(
+ "GC root stack offset is outside of fixed stack frame and out "
+ "of range for ocaml GC!");
+ }
+ AP.EmitInt16(K->StackOffset);
}
AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 759fbaa..fd957b1 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -26,7 +26,7 @@
using namespace llvm;
CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) :
+CriticalAntiDepBreaker(MachineFunction& MFi) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TRI(MF.getTarget().getRegisterInfo()),
@@ -172,7 +172,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
unsigned Reg = MO.getReg();
if (Reg == 0) continue;
const TargetRegisterClass *NewRC = 0;
-
+
if (i < MI->getDesc().getNumOperands())
NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
@@ -422,7 +422,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// breaking anti-dependence edges that aren't going to significantly
// impact the overall schedule. There are a limited number of registers
// and we want to save them for the important edges.
- //
+ //
// TODO: Instructions with multiple defs could have multiple
// anti-dependencies. The current code here only knows how to break one
// edge per instruction. Note that we'd have to be able to break all of
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index e1c52f7..63bb5f2 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -83,6 +83,12 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
return NewCI;
}
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
void IntrinsicLowering::AddPrototypes(Module &M) {
LLVMContext &Context = M.getContext();
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 331dc7d..b584704 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -65,6 +65,12 @@ static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden,
cl::desc("Print LLVM IR input to isel pass"));
static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden,
cl::desc("Dump garbage collector data"));
+static cl::opt<bool> ShowMCEncoding("show-mc-encoding", cl::Hidden,
+ cl::desc("Show encoding in .s output"));
+static cl::opt<bool> ShowMCInst("show-mc-inst", cl::Hidden,
+ cl::desc("Show instruction structure in .s output"));
+static cl::opt<bool> EnableMCLogging("enable-mc-api-logging", cl::Hidden,
+ cl::desc("Enable MC API logging"));
static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
@@ -131,21 +137,33 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
case CGFT_AssemblyFile: {
MCInstPrinter *InstPrinter =
getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
+
+ // Create a code emitter if asked to show the encoding.
+ //
+ // FIXME: These are currently leaked.
+ MCCodeEmitter *MCE = 0;
+ if (ShowMCEncoding)
+ MCE = getTarget().createCodeEmitter(*this, *Context);
+
AsmStreamer.reset(createAsmStreamer(*Context, Out,
getTargetData()->isLittleEndian(),
getVerboseAsm(), InstPrinter,
- /*codeemitter*/0));
+ MCE, ShowMCInst));
break;
}
case CGFT_ObjectFile: {
// Create the code emitter for the target if it exists. If not, .o file
// emission fails.
+ //
+ // FIXME: These are currently leaked.
MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
if (MCE == 0 || TAB == 0)
return true;
-
- AsmStreamer.reset(createMachOStreamer(*Context, *TAB, Out, MCE));
+
+ AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
+ *TAB, Out, MCE,
+ hasMCRelaxAll()));
break;
}
case CGFT_Null:
@@ -154,7 +172,10 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
AsmStreamer.reset(createNullStreamer(*Context));
break;
}
-
+
+ if (EnableMCLogging)
+ AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
+
// Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
if (Printer == 0)
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index f1bd573..03b4eab 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -68,7 +68,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
return OnlyAvailablePred;
}
-void LatencyPriorityQueue::push_impl(SUnit *SU) {
+void LatencyPriorityQueue::push(SUnit *SU) {
// Look at all of the successors of this node. Count the number of nodes that
// this node is the sole unscheduled node for.
unsigned NumNodesBlocking = 0;
@@ -79,7 +79,7 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
}
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
- Queue.push(SU);
+ Queue.push_back(SU);
}
@@ -114,3 +114,25 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
// NumNodesSolelyBlocking value.
push(OnlyAvailablePred);
}
+
+SUnit *LatencyPriorityQueue::pop() {
+ if (empty()) return NULL;
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
+ return V;
+}
+
+void LatencyPriorityQueue::remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index ca9921c..a6d38ad 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -263,7 +263,7 @@ static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
#endif
static
-bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) {
+bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
@@ -279,6 +279,24 @@ bool MultipleDefsByMI(const MachineInstr &MI, unsigned MOIdx) {
return false;
}
+/// isPartialRedef - Return true if the specified def at the specific index is
+/// partially re-defining the specified live interval. A common case of this is
+/// a definition of the sub-register.
+bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
+ LiveInterval &interval) {
+ if (!MO.getSubReg() || MO.isEarlyClobber())
+ return false;
+
+ SlotIndex RedefIndex = MIIdx.getDefIndex();
+ const LiveRange *OldLR =
+ interval.getLiveRangeContaining(RedefIndex.getUseIndex());
+ if (OldLR->valno->isDefAccurate()) {
+ MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
+ }
+ return false;
+}
+
void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineBasicBlock::iterator mi,
SlotIndex MIIdx,
@@ -302,15 +320,20 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// of inputs.
if (MO.isEarlyClobber())
defIndex = MIIdx.getUseIndex();
- VNInfo *ValNo;
+
+ // Make sure the first definition is not a partial redefinition. Add an
+ // <imp-def> of the full register.
+ if (MO.getSubReg())
+ mi->addRegisterDefined(interval.reg);
+
MachineInstr *CopyMI = NULL;
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() ||
tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = mi;
- // Earlyclobbers move back one.
- ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
+ VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -389,9 +412,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
}
} else {
- if (MultipleDefsByMI(*mi, MOIdx))
- // Mutple defs of the same virtual register by the same instruction. e.g.
- // %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+ if (MultipleDefsBySameMI(*mi, MOIdx))
+ // Multiple defs of the same virtual register by the same instruction.
+ // e.g. %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
// This is likely due to elimination of REG_SEQUENCE instructions. Return
// here since there is nothing to do.
return;
@@ -400,13 +423,21 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// must be due to phi elimination or two addr elimination. If this is
// the result of two address elimination, then the vreg is one of the
// def-and-use register operand.
- if (mi->isRegTiedToUseOperand(MOIdx)) {
+
+ // It may also be partial redef like this:
+ // 80 %reg1041:6<def> = VSHRNv4i16 %reg1034<kill>, 12, pred:14, pred:%reg0
+ // 120 %reg1041:5<def> = VSHRNv4i16 %reg1039<kill>, 12, pred:14, pred:%reg0
+ bool PartReDef = isPartialRedef(MIIdx, MO, interval);
+ if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) {
// If this is a two-address definition, then we have already processed
// the live range. The only problem is that we didn't realize there
// are actually two values in the live interval. Because of this we
// need to take the LiveRegion that defines this register and split it
// into two values.
- assert(interval.containsOneValue());
+ // Two-address vregs should always only be redefined once. This means
+ // that at this point, there should be exactly one value number in it.
+ assert((PartReDef || interval.containsOneValue()) &&
+ "Unexpected 2-addr liveint!");
SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex();
SlotIndex RedefIndex = MIIdx.getDefIndex();
if (MO.isEarlyClobber())
@@ -420,10 +451,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// because the 2-addr copy must be in the same MBB as the redef.
interval.removeRange(DefIndex, RedefIndex);
- // Two-address vregs should always only be redefined once. This means
- // that at this point, there should be exactly one value number in it.
- assert(interval.containsOneValue() && "Unexpected 2-addr liveint!");
-
// The new value number (#1) is defined by the instruction we claimed
// defined value #0.
VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
@@ -434,6 +461,12 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
OldValNo->setCopy(0);
+
+ // A re-def may be a copy. e.g. %reg1030:6<def> = VMOVD %reg1026, ...
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (PartReDef &&
+ tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ OldValNo->setCopy(&*mi);
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -451,8 +484,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
dbgs() << " RESULT: ";
interval.print(dbgs(), tri_);
});
- } else {
- assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register");
+ } else if (lv_->isPHIJoin(interval.reg)) {
// In the case of PHI elimination, each variable definition is only
// live until the end of the block. We've already taken care of the
// rest of the live range.
@@ -475,6 +507,8 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
DEBUG(dbgs() << " phi-join +" << LR);
+ } else {
+ llvm_unreachable("Multiply defined register");
}
}
@@ -528,7 +562,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
end = baseIndex.getDefIndex();
goto exit;
} else {
- int DefIdx = mi->findRegisterDefOperandIdx(interval.reg, false, tri_);
+ int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_);
if (DefIdx != -1) {
if (mi->isRegTiedToUseOperand(DefIdx)) {
// Two-address instruction.
@@ -590,7 +624,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS)
// If MI also modifies the sub-register explicitly, avoid processing it
// more than once. Do not pass in TRI here so it checks for exact match.
- if (!MI->modifiesRegister(*AS))
+ if (!MI->definesRegister(*AS))
handlePhysicalRegisterDef(MBB, MI, MIIdx, MO,
getOrCreateInterval(*AS), 0);
}
@@ -631,7 +665,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
end = baseIndex.getDefIndex();
SeenDefUse = true;
break;
- } else if (mi->modifiesRegister(interval.reg, tri_)) {
+ } else if (mi->definesRegister(interval.reg, tri_)) {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that defines
// it. Hence its interval is:
@@ -1343,7 +1377,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
MI->eraseFromParent();
continue;
}
- assert(!O.isImplicit() && "Spilling register that's used as implicit use?");
+ assert(!(O.isImplicit() && O.isUse()) &&
+ "Spilling register that's used as implicit use?");
SlotIndex index = getInstructionIndex(MI);
if (index < start || index >= end)
continue;
@@ -1605,7 +1640,7 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
// overflow a float. This expression behaves like 10^d for small d, but is
// more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
// headroom before overflow.
- float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+ float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
return (isDef + isUse) * lc;
}
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index b4ef648..b0348a5 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -140,7 +140,8 @@ bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) {
// Insert copy
const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg);
const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS);
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS,
+ MI->getDebugLoc());
(void)Emitted;
assert(Emitted && "Subreg and Dst must be of compatible register class");
// Transfer the kill/dead flags, if needed.
@@ -193,7 +194,8 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) {
// Insert sub-register copy
const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg);
const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg);
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
+ MI->getDebugLoc());
(void)Emitted;
assert(Emitted && "Subreg and Dst must be of compatible register class");
// Transfer the kill/dead flags, if needed.
@@ -262,7 +264,8 @@ bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) {
BuildMI(*MBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::KILL), DstSubReg);
else {
- bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1);
+ bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1,
+ MI->getDebugLoc());
(void)Emitted;
assert(Emitted && "Subreg and Dst must be of compatible register class");
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 84c3d71..6f4f7a8 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -23,6 +23,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -30,6 +31,9 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+static cl::opt<bool> CSEPhysDef("machine-cse-phys-defs",
+ cl::init(false), cl::Hidden);
+
namespace {
class MachineCSE : public MachineFunctionPass {
const TargetInstrInfo *TII;
@@ -39,7 +43,7 @@ namespace {
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(&ID), LookAheadLimit(5), CurrVN(0) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -52,6 +56,7 @@ namespace {
}
private:
+ const unsigned LookAheadLimit;
typedef ScopedHashTableScope<MachineInstr*, unsigned,
MachineInstrExpressionTrait> ScopeType;
DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
@@ -62,8 +67,12 @@ namespace {
bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E);
- bool hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB);
+ MachineBasicBlock::const_iterator E) const ;
+ bool hasLivePhysRegDefUse(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ unsigned &PhysDef) const;
+ bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
+ unsigned PhysDef) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -112,6 +121,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
DEBUG(dbgs() << "Coalescing: " << *DefMI);
DEBUG(dbgs() << "*** to: " << *MI);
MO.setReg(SrcReg);
+ MRI->clearKillFlags(SrcReg);
if (NewRC != SRC)
MRI->setRegClass(SrcReg, NewRC);
DefMI->eraseFromParent();
@@ -123,10 +133,11 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
return Changed;
}
-bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
- MachineBasicBlock::const_iterator I,
- MachineBasicBlock::const_iterator E) {
- unsigned LookAheadLeft = 5;
+bool
+MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
+ MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator E) const {
+ unsigned LookAheadLeft = LookAheadLimit;
while (LookAheadLeft) {
// Skip over dbg_value's.
while (I != E && I->isDebugValue())
@@ -144,6 +155,7 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
if (!TRI->regsOverlap(MO.getReg(), Reg))
continue;
if (MO.isUse())
+ // Found a use!
return false;
SeenDef = true;
}
@@ -159,41 +171,73 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
}
/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
-/// physical registers (except for dead defs of physical registers).
-bool MachineCSE::hasLivePhysRegDefUse(MachineInstr *MI, MachineBasicBlock *MBB){
- unsigned PhysDef = 0;
+/// physical registers (except for dead defs of physical registers). It also
+/// returns the physical register def by reference if it's the only one.
+bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ unsigned &PhysDef) const {
+ PhysDef = 0;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (MO.isUse())
- // Can't touch anything to read a physical register.
- return true;
- if (MO.isDead())
- // If the def is dead, it's ok.
- continue;
- // Ok, this is a physical register def that's not marked "dead". That's
- // common since this pass is run before livevariables. We can scan
- // forward a few instructions and check if it is obviously dead.
- if (PhysDef)
- // Multiple physical register defs. These are rare, forget about it.
- return true;
- PhysDef = Reg;
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isUse())
+ // Can't touch anything to read a physical register.
+ return true;
+ if (MO.isDead())
+ // If the def is dead, it's ok.
+ continue;
+ // Ok, this is a physical register def that's not marked "dead". That's
+ // common since this pass is run before livevariables. We can scan
+ // forward a few instructions and check if it is obviously dead.
+ if (PhysDef) {
+ // Multiple physical register defs. These are rare, forget about it.
+ PhysDef = 0;
+ return true;
}
+ PhysDef = Reg;
}
if (PhysDef) {
- MachineBasicBlock::iterator I = MI; I = llvm::next(I);
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
return true;
}
return false;
}
+bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
+ unsigned PhysDef) const {
+ // For now conservatively returns false if the common subexpression is
+ // not in the same basic block as the given instruction.
+ MachineBasicBlock *MBB = MI->getParent();
+ if (CSMI->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_iterator I = CSMI; I = llvm::next(I);
+ MachineBasicBlock::const_iterator E = MI;
+ unsigned LookAheadLeft = LookAheadLimit;
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
+ if (I == E)
+ return true;
+ if (I->modifiesRegister(PhysDef, TRI))
+ return false;
+
+ --LookAheadLeft;
+ ++I;
+ }
+
+ return false;
+}
+
static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) {
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) ||
@@ -326,9 +370,20 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// If the instruction defines a physical register and the value *may* be
// used, then it's not safe to replace it with a common subexpression.
- if (FoundCSE && hasLivePhysRegDefUse(MI, MBB))
+ unsigned PhysDef = 0;
+ if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
FoundCSE = false;
+ // ... Unless the CS is local and it also defines the physical register
+ // which is not clobbered in between.
+ if (PhysDef && CSEPhysDef) {
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefReaches(CSMI, MI, PhysDef))
+ FoundCSE = true;
+ }
+ }
+
if (!FoundCSE) {
VNT.insert(MI, CurrVN++);
Exps.push_back(MI);
@@ -365,8 +420,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Actually perform the elimination.
if (DoCSE) {
- for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i)
+ for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
+ MRI->clearKillFlags(CSEPairs[i].second);
+ }
MI->eraseFromParent();
++NumCSEs;
} else {
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 3cf10b3..a38c881 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -398,8 +398,14 @@ void MachineFunction::viewCFGOnly() const
unsigned MachineFunction::addLiveIn(unsigned PReg,
const TargetRegisterClass *RC) {
assert(RC->contains(PReg) && "Not the correct regclass!");
- unsigned VReg = getRegInfo().createVirtualRegister(RC);
- getRegInfo().addLiveIn(PReg, VReg);
+ MachineRegisterInfo &MRI = getRegInfo();
+ unsigned VReg = MRI.getLiveInVirtReg(PReg);
+ if (VReg) {
+ assert(MRI.getRegClass(VReg) == RC && "Register class mismatch!");
+ return VReg;
+ }
+ VReg = MRI.createVirtualRegister(RC);
+ MRI.addLiveIn(PReg, VReg);
return VReg;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 99b5beb..e54cd5c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -219,8 +219,12 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "%physreg" << getReg();
}
- if (getSubReg() != 0)
- OS << ':' << getSubReg();
+ if (getSubReg() != 0) {
+ if (TM)
+ OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
+ else
+ OS << ':' << getSubReg();
+ }
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
isEarlyClobber()) {
@@ -781,25 +785,57 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
}
return -1;
}
-
+
+/// readsWritesVirtualRegister - Return a pair of bools (reads, writes)
+/// indicating if this instruction reads or writes Reg. This also considers
+/// partial defines.
+std::pair<bool,bool>
+MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+ SmallVectorImpl<unsigned> *Ops) const {
+ bool PartDef = false; // Partial redefine.
+ bool FullDef = false; // Full define.
+ bool Use = false;
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (!MO.isReg() || MO.getReg() != Reg)
+ continue;
+ if (Ops)
+ Ops->push_back(i);
+ if (MO.isUse())
+ Use |= !MO.isUndef();
+ else if (MO.getSubReg())
+ PartDef = true;
+ else
+ FullDef = true;
+ }
+ // A partial redefine uses Reg unless there is also a full define.
+ return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef);
+}
+
/// findRegisterDefOperandIdx() - Returns the operand index that is a def of
/// the specified register or -1 if it is not found. If isDead is true, defs
/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
/// also checks if there is a def of a super-register.
-int MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead,
- const TargetRegisterInfo *TRI) const {
+int
+MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+ const TargetRegisterInfo *TRI) const {
+ bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
unsigned MOReg = MO.getReg();
- if (MOReg == Reg ||
- (TRI &&
- TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- TargetRegisterInfo::isPhysicalRegister(Reg) &&
- TRI->isSubRegister(MOReg, Reg)))
- if (!isDead || MO.isDead())
- return i;
+ bool Found = (MOReg == Reg);
+ if (!Found && TRI && isPhys &&
+ TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Overlap)
+ Found = TRI->regsOverlap(MOReg, Reg);
+ else
+ Found = TRI->isSubRegister(MOReg, Reg);
+ }
+ if (Found && (!isDead || MO.isDead()))
+ return i;
}
return -1;
}
@@ -938,6 +974,16 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
return true;
}
+/// clearKillInfo - Clears kill flags on all operands.
+///
+void MachineInstr::clearKillInfo() {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ MO.setIsKill(false);
+ }
+}
+
/// copyKillDeadInfo - Copies kill / dead operand properties from MI.
///
void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
@@ -1355,11 +1401,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
void MachineInstr::addRegisterDefined(unsigned IncomingReg,
const TargetRegisterInfo *RegInfo) {
- MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
- if (!MO || MO->getSubReg())
- addOperand(MachineOperand::CreateReg(IncomingReg,
- true /*IsDef*/,
- true /*IsImp*/));
+ if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) {
+ MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo);
+ if (MO)
+ return;
+ } else {
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() &&
+ MO.getSubReg() == 0)
+ return;
+ }
+ }
+ addOperand(MachineOperand::CreateReg(IncomingReg,
+ true /*IsDef*/,
+ true /*IsImp*/));
}
unsigned
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index b2e757d..6120617 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -738,8 +738,10 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
"Instructions with different phys regs are not identical!");
if (MO.isReg() && MO.isDef() &&
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+ }
}
MI->eraseFromParent();
++NumCSEed;
@@ -784,6 +786,15 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// Otherwise, splice the instruction to the preheader.
CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI);
+ // Clear the kill flags of any register this instruction defines,
+ // since they may need to be live throughout the entire loop
+ // rather than just live for part of it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef() && !MO.isDead())
+ RegInfo->clearKillFlags(MO.getReg());
+ }
+
// Add to the CSE map.
if (CI != CSEMap.end())
CI->second.push_back(MI);
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index ea5ca0c..70bf7e5 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -133,6 +133,15 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
return ++UI == use_nodbg_end();
}
+/// clearKillFlags - Iterate over all the uses of the given register and
+/// clear the kill flag from the MachineOperand. This function is used by
+/// optimization passes which extend register lifetimes and need only
+/// preserve conservative kill flag information.
+void MachineRegisterInfo::clearKillFlags(unsigned Reg) const {
+ for (use_iterator UI = use_begin(Reg), UE = use_end(); UI != UE; ++UI)
+ UI.getOperand().setIsKill(false);
+}
+
bool MachineRegisterInfo::isLiveIn(unsigned Reg) const {
for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
if (I->first == Reg || I->second == Reg)
@@ -156,6 +165,15 @@ unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const {
return 0;
}
+/// getLiveInVirtReg - If PReg is a live-in physical register, return the
+/// corresponding live-in physical register.
+unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const {
+ for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I)
+ if (I->first == PReg)
+ return I->second;
+ return 0;
+}
+
static cl::opt<bool>
SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
cl::desc("Schedule copies of livein registers"),
@@ -218,7 +236,8 @@ static void EmitLiveInCopy(MachineBasicBlock *MBB,
--Pos;
}
- bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC);
+ bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC,
+ DebugLoc());
assert(Emitted && "Unable to issue a live-in copy instruction!\n");
(void) Emitted;
@@ -253,7 +272,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
if (LI->second) {
const TargetRegisterClass *RC = getRegClass(LI->second);
bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(),
- LI->second, LI->first, RC, RC);
+ LI->second, LI->first, RC, RC,
+ DebugLoc());
assert(Emitted && "Unable to issue a live-in copy instruction!\n");
(void) Emitted;
}
@@ -265,6 +285,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
EntryMBB->addLiveIn(I->first);
}
+void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) {
+ for (int i = UsedPhysRegs.find_first(); i >= 0;
+ i = UsedPhysRegs.find_next(i))
+ for (const unsigned *SS = TRI.getSubRegisters(i);
+ unsigned SubReg = *SS; ++SS)
+ if (SubReg > unsigned(i))
+ UsedPhysRegs.set(SubReg);
+}
+
#ifndef NDEBUG
void MachineRegisterInfo::dumpUses(unsigned Reg) const {
for (use_iterator I = use_begin(Reg), E = use_end(); I != E; ++I)
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index b8996d4..84d6df2 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -26,39 +26,17 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
using namespace llvm;
-/// BBInfo - Per-basic block information used internally by MachineSSAUpdater.
-class MachineSSAUpdater::BBInfo {
-public:
- MachineBasicBlock *BB; // Back-pointer to the corresponding block.
- unsigned AvailableVal; // Value to use in this block.
- BBInfo *DefBB; // Block that defines the available value.
- int BlkNum; // Postorder number.
- BBInfo *IDom; // Immediate dominator.
- unsigned NumPreds; // Number of predecessor blocks.
- BBInfo **Preds; // Array[NumPreds] of predecessor blocks.
- MachineInstr *PHITag; // Marker for existing PHIs that match.
-
- BBInfo(MachineBasicBlock *ThisBB, unsigned V)
- : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
- NumPreds(0), Preds(0), PHITag(0) { }
-};
-
-typedef DenseMap<MachineBasicBlock*, MachineSSAUpdater::BBInfo*> BBMapTy;
-
typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
-static BBMapTy *getBBMap(void *BM) {
- return static_cast<BBMapTy*>(BM);
-}
-
MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
SmallVectorImpl<MachineInstr*> *NewPHI)
- : AV(0), BM(0), InsertedPHIs(NewPHI) {
+ : AV(0), InsertedPHIs(NewPHI) {
TII = MF.getTarget().getInstrInfo();
MRI = &MF.getRegInfo();
}
@@ -134,7 +112,8 @@ static
MachineInstr *InsertNewDef(unsigned Opcode,
MachineBasicBlock *BB, MachineBasicBlock::iterator I,
const TargetRegisterClass *RC,
- MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
+ MachineRegisterInfo *MRI,
+ const TargetInstrInfo *TII) {
unsigned NewVR = MRI->createVirtualRegister(RC);
return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
}
@@ -263,438 +242,131 @@ void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
I->second = NewReg;
}
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it. If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
-unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (unsigned V = AvailableVals[BB])
- return V;
+/// MachinePHIiter - Iterator for PHI operands. This is used for the
+/// PHI_iterator in the SSAUpdaterImpl template.
+namespace {
+ class MachinePHIiter {
+ private:
+ MachineInstr *PHI;
+ unsigned idx;
+
+ public:
+ explicit MachinePHIiter(MachineInstr *P) // begin iterator
+ : PHI(P), idx(1) {}
+ MachinePHIiter(MachineInstr *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumOperands()) {}
+
+ MachinePHIiter &operator++() { idx += 2; return *this; }
+ bool operator==(const MachinePHIiter& x) const { return idx == x.idx; }
+ bool operator!=(const MachinePHIiter& x) const { return !operator==(x); }
+ unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); }
+ MachineBasicBlock *getIncomingBlock() {
+ return PHI->getOperand(idx+1).getMBB();
+ }
+ };
+}
- // Pool allocation used internally by GetValueAtEndOfBlock.
- BumpPtrAllocator Allocator;
- BBMapTy BBMapObj;
- BM = &BBMapObj;
+/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl
+/// template, specialized for MachineSSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<MachineSSAUpdater> {
+public:
+ typedef MachineBasicBlock BlkT;
+ typedef unsigned ValT;
+ typedef MachineInstr PhiT;
+
+ typedef MachineBasicBlock::succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); }
+
+ typedef MachinePHIiter PHI_iterator;
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
+ }
- SmallVector<BBInfo*, 100> BlockList;
- BuildBlockList(BB, &BlockList, &Allocator);
+ /// FindPredecessorBlocks - Put the predecessors of BB into the Preds
+ /// vector.
+ static void FindPredecessorBlocks(MachineBasicBlock *BB,
+ SmallVectorImpl<MachineBasicBlock*> *Preds){
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI)
+ Preds->push_back(*PI);
+ }
- // Special case: bail out if BB is unreachable.
- if (BlockList.size() == 0) {
- BM = 0;
+ /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned GetUndefVal(MachineBasicBlock *BB,
+ MachineSSAUpdater *Updater) {
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
BB, BB->getFirstTerminator(),
- VRC, MRI, TII);
- unsigned V = NewDef->getOperand(0).getReg();
- AvailableVals[BB] = V;
- return V;
- }
-
- FindDominators(&BlockList);
- FindPHIPlacement(&BlockList);
- FindAvailableVals(&BlockList);
-
- BM = 0;
- return BBMapObj[BB]->DefBB->AvailableVal;
-}
-
-/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
-/// vector, set Info->NumPreds, and allocate space in Info->Preds.
-static void FindPredecessorBlocks(MachineSSAUpdater::BBInfo *Info,
- SmallVectorImpl<MachineBasicBlock*> *Preds,
- BumpPtrAllocator *Allocator) {
- MachineBasicBlock *BB = Info->BB;
- for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
- E = BB->pred_end(); PI != E; ++PI)
- Preds->push_back(*PI);
-
- Info->NumPreds = Preds->size();
- Info->Preds = static_cast<MachineSSAUpdater::BBInfo**>
- (Allocator->Allocate(Info->NumPreds * sizeof(MachineSSAUpdater::BBInfo*),
- AlignOf<MachineSSAUpdater::BBInfo*>::Alignment));
-}
-
-/// BuildBlockList - Starting from the specified basic block, traverse back
-/// through its predecessors until reaching blocks with known values. Create
-/// BBInfo structures for the blocks and append them to the block list.
-void MachineSSAUpdater::BuildBlockList(MachineBasicBlock *BB,
- BlockListTy *BlockList,
- BumpPtrAllocator *Allocator) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- BBMapTy *BBMap = getBBMap(BM);
- SmallVector<BBInfo*, 10> RootList;
- SmallVector<BBInfo*, 64> WorkList;
-
- BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
- (*BBMap)[BB] = Info;
- WorkList.push_back(Info);
-
- // Search backward from BB, creating BBInfos along the way and stopping when
- // reaching blocks that define the value. Record those defining blocks on
- // the RootList.
- SmallVector<MachineBasicBlock*, 10> Preds;
- while (!WorkList.empty()) {
- Info = WorkList.pop_back_val();
- Preds.clear();
- FindPredecessorBlocks(Info, &Preds, Allocator);
-
- // Treat an unreachable predecessor as a definition with 'undef'.
- if (Info->NumPreds == 0) {
- // Insert an implicit_def to represent an undef value.
- MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
- Info->BB,
- Info->BB->getFirstTerminator(),
- VRC, MRI, TII);
- Info->AvailableVal = NewDef->getOperand(0).getReg();
- Info->DefBB = Info;
- RootList.push_back(Info);
- continue;
- }
-
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- MachineBasicBlock *Pred = Preds[p];
- // Check if BBMap already has a BBInfo for the predecessor block.
- BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
- if (BBMapBucket.second) {
- Info->Preds[p] = BBMapBucket.second;
- continue;
- }
-
- // Create a new BBInfo for the predecessor.
- unsigned PredVal = AvailableVals.lookup(Pred);
- BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
- BBMapBucket.second = PredInfo;
- Info->Preds[p] = PredInfo;
-
- if (PredInfo->AvailableVal) {
- RootList.push_back(PredInfo);
- continue;
- }
- WorkList.push_back(PredInfo);
- }
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return NewDef->getOperand(0).getReg();
}
- // Now that we know what blocks are backwards-reachable from the starting
- // block, do a forward depth-first traversal to assign postorder numbers
- // to those blocks.
- BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
- unsigned BlkNum = 1;
-
- // Initialize the worklist with the roots from the backward traversal.
- while (!RootList.empty()) {
- Info = RootList.pop_back_val();
- Info->IDom = PseudoEntry;
- Info->BlkNum = -1;
- WorkList.push_back(Info);
+ /// CreateEmptyPHI - Create a PHI instruction that defines a new register.
+ /// Add it into the specified block and return the register.
+ static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds,
+ MachineSSAUpdater *Updater) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc,
+ Updater->VRC, Updater->MRI,
+ Updater->TII);
+ return PHI->getOperand(0).getReg();
}
- while (!WorkList.empty()) {
- Info = WorkList.back();
-
- if (Info->BlkNum == -2) {
- // All the successors have been handled; assign the postorder number.
- Info->BlkNum = BlkNum++;
- // If not a root, put it on the BlockList.
- if (!Info->AvailableVal)
- BlockList->push_back(Info);
- WorkList.pop_back();
- continue;
- }
-
- // Leave this entry on the worklist, but set its BlkNum to mark that its
- // successors have been put on the worklist. When it returns to the top
- // the list, after handling its successors, it will be assigned a number.
- Info->BlkNum = -2;
-
- // Add unvisited successors to the work list.
- for (MachineBasicBlock::succ_iterator SI = Info->BB->succ_begin(),
- E = Info->BB->succ_end(); SI != E; ++SI) {
- BBInfo *SuccInfo = (*BBMap)[*SI];
- if (!SuccInfo || SuccInfo->BlkNum)
- continue;
- SuccInfo->BlkNum = -1;
- WorkList.push_back(SuccInfo);
- }
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(MachineInstr *PHI, unsigned Val,
+ MachineBasicBlock *Pred) {
+ PHI->addOperand(MachineOperand::CreateReg(Val, false));
+ PHI->addOperand(MachineOperand::CreateMBB(Pred));
}
- PseudoEntry->BlkNum = BlkNum;
-}
-/// IntersectDominators - This is the dataflow lattice "meet" operation for
-/// finding dominators. Given two basic blocks, it walks up the dominator
-/// tree until it finds a common dominator of both. It uses the postorder
-/// number of the blocks to determine how to do that.
-static MachineSSAUpdater::BBInfo *
-IntersectDominators(MachineSSAUpdater::BBInfo *Blk1,
- MachineSSAUpdater::BBInfo *Blk2) {
- while (Blk1 != Blk2) {
- while (Blk1->BlkNum < Blk2->BlkNum) {
- Blk1 = Blk1->IDom;
- if (!Blk1)
- return Blk2;
- }
- while (Blk2->BlkNum < Blk1->BlkNum) {
- Blk2 = Blk2->IDom;
- if (!Blk2)
- return Blk1;
- }
- }
- return Blk1;
-}
-
-/// FindDominators - Calculate the dominator tree for the subset of the CFG
-/// corresponding to the basic blocks on the BlockList. This uses the
-/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
-/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
-/// Because the CFG subset does not include any edges leading into blocks that
-/// define the value, the results are not the usual dominator tree. The CFG
-/// subset has a single pseudo-entry node with edges to a set of root nodes
-/// for blocks that define the value. The dominators for this subset CFG are
-/// not the standard dominators but they are adequate for placing PHIs within
-/// the subset CFG.
-void MachineSSAUpdater::FindDominators(BlockListTy *BlockList) {
- bool Changed;
- do {
- Changed = false;
- // Iterate over the list in reverse order, i.e., forward on CFG edges.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- // Start with the first predecessor.
- assert(Info->NumPreds > 0 && "unreachable block");
- BBInfo *NewIDom = Info->Preds[0];
-
- // Iterate through the block's other predecessors.
- for (unsigned p = 1; p != Info->NumPreds; ++p) {
- BBInfo *Pred = Info->Preds[p];
- NewIDom = IntersectDominators(NewIDom, Pred);
- }
-
- // Check if the IDom value has changed.
- if (NewIDom != Info->IDom) {
- Info->IDom = NewIDom;
- Changed = true;
- }
- }
- } while (Changed);
-}
-
-/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
-/// any blocks containing definitions of the value. If one is found, then the
-/// successor of Pred is in the dominance frontier for the definition, and
-/// this function returns true.
-static bool IsDefInDomFrontier(const MachineSSAUpdater::BBInfo *Pred,
- const MachineSSAUpdater::BBInfo *IDom) {
- for (; Pred != IDom; Pred = Pred->IDom) {
- if (Pred->DefBB == Pred)
- return true;
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static MachineInstr *InstrIsPHI(MachineInstr *I) {
+ if (I && I->isPHI())
+ return I;
+ return 0;
}
- return false;
-}
-
-/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
-/// the known definitions. Iteratively add PHIs in the dom frontiers until
-/// nothing changes. Along the way, keep track of the nearest dominating
-/// definitions for non-PHI blocks.
-void MachineSSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
- bool Changed;
- do {
- Changed = false;
- // Iterate over the list in reverse order, i.e., forward on CFG edges.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- // If this block already needs a PHI, there is nothing to do here.
- if (Info->DefBB == Info)
- continue;
-
- // Default to use the same def as the immediate dominator.
- BBInfo *NewDefBB = Info->IDom->DefBB;
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
- // Need a PHI here.
- NewDefBB = Info;
- break;
- }
- }
- // Check if anything changed.
- if (NewDefBB != Info->DefBB) {
- Info->DefBB = NewDefBB;
- Changed = true;
- }
- }
- } while (Changed);
-}
-
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI. Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void MachineSSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
-
- // Go through the worklist in forward order (i.e., backward through the CFG)
- // and check if existing PHIs can be used. If not, create empty PHIs where
- // they are needed.
- for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
- I != E; ++I) {
- BBInfo *Info = *I;
- // Check if there needs to be a PHI in BB.
- if (Info->DefBB != Info)
- continue;
-
- // Look for an existing PHI.
- FindExistingPHI(Info->BB, BlockList);
- if (Info->AvailableVal)
- continue;
-
- MachineBasicBlock::iterator Loc =
- Info->BB->empty() ? Info->BB->end() : Info->BB->front();
- MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, Info->BB, Loc,
- VRC, MRI, TII);
- unsigned PHI = InsertedPHI->getOperand(0).getReg();
- Info->AvailableVal = PHI;
- AvailableVals[Info->BB] = PHI;
+ /// ValueIsPHI - Check if the instruction that defines the specified register
+ /// is a PHI instruction.
+ static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ return InstrIsPHI(Updater->MRI->getVRegDef(Val));
}
- // Now go back through the worklist in reverse order to fill in the arguments
- // for any new PHIs added in the forward traversal.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- if (Info->DefBB != Info) {
- // Record the available value at join nodes to speed up subsequent
- // uses of this SSAUpdater for the same value.
- if (Info->NumPreds > 1)
- AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
- continue;
- }
-
- // Check if this block contains a newly added PHI.
- unsigned PHI = Info->AvailableVal;
- MachineInstr *InsertedPHI = MRI->getVRegDef(PHI);
- if (!InsertedPHI->isPHI() || InsertedPHI->getNumOperands() > 1)
- continue;
-
- // Iterate through the block's predecessors.
- MachineInstrBuilder MIB(InsertedPHI);
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- BBInfo *PredInfo = Info->Preds[p];
- MachineBasicBlock *Pred = PredInfo->BB;
- // Skip to the nearest preceding definition.
- if (PredInfo->DefBB != PredInfo)
- PredInfo = PredInfo->DefBB;
- MIB.addReg(PredInfo->AvailableVal).addMBB(Pred);
- }
-
- DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
-
- // If the client wants to know about all new instructions, tell it.
- if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) {
+ MachineInstr *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumOperands() <= 1)
+ return PHI;
+ return 0;
}
-}
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void MachineSSAUpdater::FindExistingPHI(MachineBasicBlock *BB,
- BlockListTy *BlockList) {
- for (MachineBasicBlock::iterator BBI = BB->begin(), BBE = BB->end();
- BBI != BBE && BBI->isPHI(); ++BBI) {
- if (CheckIfPHIMatches(BBI)) {
- RecordMatchingPHI(BBI);
- break;
- }
- // Match failed: clear all the PHITag values.
- for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
- I != E; ++I)
- (*I)->PHITag = 0;
+ /// GetPHIValue - For the specified PHI instruction, return the register
+ /// that it defines.
+ static unsigned GetPHIValue(MachineInstr *PHI) {
+ return PHI->getOperand(0).getReg();
}
-}
-
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool MachineSSAUpdater::CheckIfPHIMatches(MachineInstr *PHI) {
- BBMapTy *BBMap = getBBMap(BM);
- SmallVector<MachineInstr*, 20> WorkList;
- WorkList.push_back(PHI);
-
- // Mark that the block containing this PHI has been visited.
- (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
- while (!WorkList.empty()) {
- PHI = WorkList.pop_back_val();
-
- // Iterate through the PHI's incoming values.
- for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
- unsigned IncomingVal = PHI->getOperand(i).getReg();
- BBInfo *PredInfo = (*BBMap)[PHI->getOperand(i+1).getMBB()];
- // Skip to the nearest preceding definition.
- if (PredInfo->DefBB != PredInfo)
- PredInfo = PredInfo->DefBB;
-
- // Check if it matches the expected value.
- if (PredInfo->AvailableVal) {
- if (IncomingVal == PredInfo->AvailableVal)
- continue;
- return false;
- }
-
- // Check if the value is a PHI in the correct block.
- MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
- if (!IncomingPHIVal->isPHI() ||
- IncomingPHIVal->getParent() != PredInfo->BB)
- return false;
-
- // If this block has already been visited, check if this PHI matches.
- if (PredInfo->PHITag) {
- if (IncomingPHIVal == PredInfo->PHITag)
- continue;
- return false;
- }
- PredInfo->PHITag = IncomingPHIVal;
+};
- WorkList.push_back(IncomingPHIVal);
- }
- }
- return true;
-}
+} // End llvm namespace
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void MachineSSAUpdater::RecordMatchingPHI(MachineInstr *PHI) {
- BBMapTy *BBMap = getBBMap(BM);
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
AvailableValsTy &AvailableVals = getAvailableVals(AV);
- SmallVector<MachineInstr*, 20> WorkList;
- WorkList.push_back(PHI);
-
- // Record this PHI.
- MachineBasicBlock *BB = PHI->getParent();
- AvailableVals[BB] = PHI->getOperand(0).getReg();
- (*BBMap)[BB]->AvailableVal = PHI->getOperand(0).getReg();
-
- while (!WorkList.empty()) {
- PHI = WorkList.pop_back_val();
-
- // Iterate through the PHI's incoming values.
- for (unsigned i = 1, e = PHI->getNumOperands(); i != e; i += 2) {
- unsigned IncomingVal = PHI->getOperand(i).getReg();
- MachineInstr *IncomingPHIVal = MRI->getVRegDef(IncomingVal);
- if (!IncomingPHIVal->isPHI()) continue;
- BB = IncomingPHIVal->getParent();
- BBInfo *Info = (*BBMap)[BB];
- if (!Info || Info->AvailableVal)
- continue;
-
- // Record the PHI and add it to the worklist.
- AvailableVals[BB] = IncomingVal;
- Info->AvailableVal = IncomingVal;
- WorkList.push_back(IncomingPHIVal);
- }
- }
+ if (unsigned V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
}
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index ef489dc..1610e6c 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -314,5 +314,10 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Move the instruction.
SuccToSinkTo->splice(InsertPos, ParentBlock, MI,
++MachineBasicBlock::iterator(MI));
+
+ // Conservatively, clear any kill flags, since it's possible that
+ // they are no longer correct.
+ MI->clearKillInfo();
+
return true;
}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0b75c55..8baf01c 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -47,7 +47,7 @@ namespace {
MachineVerifier(Pass *pass, bool allowDoubleDefs) :
PASS(pass),
allowVirtDoubleDefs(allowDoubleDefs),
- allowPhysDoubleDefs(allowDoubleDefs),
+ allowPhysDoubleDefs(true),
OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
{}
@@ -552,19 +552,23 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
regsLiveInButUnused.erase(Reg);
bool isKill = false;
- if (MO->isKill()) {
- isKill = true;
- // Tied operands on two-address instuctions MUST NOT have a <kill> flag.
- if (MI->isRegTiedToDefOperand(MONum))
+ unsigned defIdx;
+ if (MI->isRegTiedToDefOperand(MONum, &defIdx)) {
+ // A two-addr use counts as a kill if use and def are the same.
+ unsigned DefReg = MI->getOperand(defIdx).getReg();
+ if (Reg == DefReg) {
+ isKill = true;
+ // ANd in that case an explicit kill flag is not allowed.
+ if (MO->isKill())
report("Illegal kill flag on two-address instruction operand",
MO, MONum);
- } else {
- // TwoAddress instr modifying a reg is treated as kill+def.
- unsigned defIdx;
- if (MI->isRegTiedToDefOperand(MONum, &defIdx) &&
- MI->getOperand(defIdx).getReg() == Reg)
- isKill = true;
- }
+ } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ report("Two-address instruction operands must be identical",
+ MO, MONum);
+ }
+ } else
+ isKill = MO->isKill();
+
if (isKill) {
addRegWithSubRegs(regsKilled, Reg);
@@ -631,11 +635,14 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Virtual register.
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
if (SubIdx) {
- if (RC->subregclasses_begin()+SubIdx >= RC->subregclasses_end()) {
+ const TargetRegisterClass *SRC = RC->getSubRegisterRegClass(SubIdx);
+ if (!SRC) {
report("Invalid subregister index for virtual register", MO, MONum);
+ *OS << "Register class " << RC->getName()
+ << " does not support subreg index " << SubIdx << "\n";
return;
}
- RC = *(RC->subregclasses_begin()+SubIdx);
+ RC = SRC;
}
if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
if (RC != DRC && !RC->hasSuperClass(DRC)) {
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 1651719..edbc13f 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -27,10 +27,8 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
#include <algorithm>
#include <map>
using namespace llvm;
@@ -88,10 +86,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
ImpDefs.clear();
VRegPHIUseCount.clear();
- // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
- // SSA form.
- Changed |= EliminateRegSequences(MF);
-
return Changed;
}
@@ -216,7 +210,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
} else {
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
}
- TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
+ TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC,
+ MPhi->getDebugLoc());
}
// Update live variable information if there is any.
@@ -298,7 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Insert the copy.
if (!reusedIncoming && IncomingReg)
- TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
+ TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC,
+ MPhi->getDebugLoc());
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
@@ -449,58 +445,3 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
return NMBB;
}
-
-static void UpdateRegSequenceSrcs(unsigned SrcReg,
- unsigned DstReg, unsigned SrcIdx,
- MachineRegisterInfo *MRI) {
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- UE = MRI->reg_end(); RI != UE; ) {
- MachineOperand &MO = RI.getOperand();
- ++RI;
- MO.setReg(DstReg);
- MO.setSubReg(SrcIdx);
- }
-}
-
-/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as second part
-/// of de-ssa process. This replaces sources of REG_SEQUENCE as sub-register
-/// references of the register defined by REG_SEQUENCE. e.g.
-///
-/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
-/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
-/// =>
-/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
-bool PHIElimination::EliminateRegSequences(MachineFunction &MF) {
- bool Changed = false;
-
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
- for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
- BBI != BBE; ) {
- MachineInstr &MI = *BBI;
- ++BBI;
- if (MI.getOpcode() != TargetOpcode::REG_SEQUENCE)
- continue;
- unsigned DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- !(MI.getNumOperands() & 1)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
- llvm_unreachable(0);
- }
- for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
- unsigned SrcReg = MI.getOperand(i).getReg();
- if (MI.getOperand(i).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
- DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
- llvm_unreachable(0);
- }
- unsigned SrcIdx = MI.getOperand(i+1).getImm();
- UpdateRegSequenceSrcs(SrcReg, DstReg, SrcIdx, MRI);
- }
-
- MI.eraseFromParent();
- Changed = true;
- }
-
- return Changed;
-}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 3292aa2..7dedf03 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -94,8 +94,6 @@ namespace llvm {
return I;
}
- bool EliminateRegSequences(MachineFunction &MF);
-
typedef std::pair<unsigned, unsigned> BBVRegPair;
typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index d3e1295..9714ea6 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -114,7 +114,7 @@ namespace {
/// AvailableQueue - The priority queue to use for the available SUnits.
///
LatencyPriorityQueue AvailableQueue;
-
+
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands becomes available, the instruction is
@@ -158,7 +158,7 @@ namespace {
/// Schedule - Schedule the instruction range using list scheduling.
///
void Schedule();
-
+
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
///
@@ -179,7 +179,7 @@ namespace {
void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle);
void ListScheduleTopDown();
void StartBlockForKills(MachineBasicBlock *BB);
-
+
// ToggleKillFlag - Toggle a register operand kill flag. Other
// adjustments may be made to the instruction if necessary. Return
// true if the operand has been deleted, false if not.
@@ -197,13 +197,13 @@ static bool isSchedulingBoundary(const MachineInstr *MI,
if (MI->getDesc().isTerminator() || MI->isLabel())
return true;
- // Don't attempt to schedule around any instruction that modifies
+ // Don't attempt to schedule around any instruction that defines
// a stack-oriented pointer, as it's unlikely to be profitable. This
// saves compile time, because it doesn't require every single
// stack slot reference to depend on the instruction that does the
// modification.
const TargetLowering &TLI = *MF.getTarget().getTargetLowering();
- if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore()))
+ if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore()))
return true;
return false;
@@ -227,9 +227,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Check for antidep breaking override...
if (EnableAntiDepBreaking.getPosition() > 0) {
- AntiDepMode = (EnableAntiDepBreaking == "all") ? TargetSubtarget::ANTIDEP_ALL :
- (EnableAntiDepBreaking == "critical") ? TargetSubtarget::ANTIDEP_CRITICAL :
- TargetSubtarget::ANTIDEP_NONE;
+ AntiDepMode = (EnableAntiDepBreaking == "all") ?
+ TargetSubtarget::ANTIDEP_ALL :
+ (EnableAntiDepBreaking == "critical")
+ ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
}
DEBUG(dbgs() << "PostRAScheduler\n");
@@ -240,10 +241,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ?
(ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) :
(ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
- AntiDepBreaker *ADB =
+ AntiDepBreaker *ADB =
((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
(AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
- ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
(AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
@@ -265,17 +266,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Initialize register live-range state for scheduling in this block.
Scheduler.StartBlock(MBB);
- // FIXME: Temporary workaround for <rdar://problem/7759363>: The post-RA
- // scheduler has some sort of problem with DebugValue instructions that
- // causes an assertion in LeaksContext.h to fail occasionally. Just
- // remove all those instructions for now.
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ) {
- MachineInstr *MI = &*I++;
- if (MI->isDebugValue())
- MI->eraseFromParent();
- }
-
// Schedule each sequence of instructions not interrupted by a label
// or anything else that effectively needs to shut down scheduling.
MachineBasicBlock::iterator Current = MBB->end();
@@ -310,7 +300,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
return true;
}
-
+
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
///
@@ -331,10 +321,10 @@ void SchedulePostRATDList::Schedule() {
BuildSchedGraph(AA);
if (AntiDepBreak != NULL) {
- unsigned Broken =
+ unsigned Broken =
AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos,
InsertPosIndex);
-
+
if (Broken != 0) {
// We made changes. Update the dependency graph.
// Theoretically we could update the graph in place:
@@ -347,7 +337,7 @@ void SchedulePostRATDList::Schedule() {
EntrySU = SUnit();
ExitSU = SUnit();
BuildSchedGraph(AA);
-
+
NumFixedAnti += Broken;
}
}
@@ -425,7 +415,7 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI,
MO.setIsKill(true);
return false;
}
-
+
// If MO itself is live, clear the kill flag...
if (KillIndices[MO.getReg()] != ~0u) {
MO.setIsKill(false);
@@ -464,7 +454,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
BitVector ReservedRegs = TRI->getReservedRegs(MF);
StartBlockForKills(MBB);
-
+
// Examine block from end to start...
unsigned Count = MBB->size();
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
@@ -484,9 +474,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if (!MO.isDef()) continue;
// Ignore two-addr defs.
if (MI->isRegTiedToUseOperand(i)) continue;
-
+
KillIndices[Reg] = ~0u;
-
+
// Repeat for all subregs.
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
@@ -521,17 +511,17 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if (kill)
kill = (KillIndices[Reg] == ~0u);
}
-
+
if (MO.isKill() != kill) {
DEBUG(dbgs() << "Fixing " << MO << " in ");
// Warning: ToggleKillFlag may invalidate MO.
ToggleKillFlag(MI, MO);
DEBUG(MI->dump());
}
-
+
killedRegs.insert(Reg);
}
-
+
// Mark any used register (that is not using undef) and subregs as
// now live...
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -541,7 +531,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
KillIndices[Reg] = Count;
-
+
for (const unsigned *Subreg = TRI->getSubRegisters(Reg);
*Subreg; ++Subreg) {
KillIndices[*Subreg] = Count;
@@ -573,7 +563,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) {
// available. This is the max of the start time of all predecessors plus
// their latencies.
SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency());
-
+
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -594,9 +584,9 @@ void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) {
void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
-
+
Sequence.push_back(SU);
- assert(CurCycle >= SU->getDepth() &&
+ assert(CurCycle >= SU->getDepth() &&
"Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
@@ -609,7 +599,7 @@ void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// schedulers.
void SchedulePostRATDList::ListScheduleTopDown() {
unsigned CurCycle = 0;
-
+
// We're scheduling top-down but we're visiting the regions in
// bottom-up order, so we don't know the hazards at the start of a
// region. So assume no hazards (this should usually be ok as most
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 2d49beb..96e7327 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -882,7 +882,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
!RefsInMBB.count(FoldPt))
--FoldPt;
- int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg, false);
+ int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg);
if (OpIdx == -1)
return 0;
@@ -1061,7 +1061,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
// Add spill.
SS = CreateSpillStackSlot(CurrLI->reg, RC);
- TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
+ TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC,
+ TRI);
SpillMI = prior(SpillPt);
SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
@@ -1097,7 +1098,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
}
// Add spill.
SS = CreateSpillStackSlot(CurrLI->reg, RC);
- TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
+ TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC,
+ TRI);
SpillMI = prior(SpillPt);
SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
@@ -1116,7 +1118,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
RestoreIndex = LIs->getInstructionIndex(RestorePt);
FoldedRestore = true;
} else {
- TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
+ TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI);
MachineInstr *LoadMI = prior(RestorePt);
RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
}
@@ -1152,7 +1154,7 @@ PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
// codegen is not modelling. Ignore these barriers for now.
if (!TII->isSafeToMoveRegClassDefs(*RC))
continue;
- std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
+ const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
unsigned Reg = VRs[i];
if (!LIs->hasInterval(Reg))
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index d7179b3..62f525f 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -46,7 +46,7 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
const TargetInstrInfo *tii_) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg)
+ Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0)
return true;
if (OpIdx == 2 && MI->isSubregToReg())
@@ -89,6 +89,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineInstr *MI = &*I;
++I;
if (MI->isImplicitDef()) {
+ if (MI->getOperand(0).getSubReg())
+ continue;
unsigned Reg = MI->getOperand(0).getReg();
ImpDefRegs.insert(Reg);
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -218,7 +220,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
// Turn a copy use into an implicit_def.
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg) {
+ Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) {
RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
bool isKill = false;
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a454b62..e778024 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -58,8 +58,9 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
FrameConstantRegMap.clear();
- // Calculate the MaxCallFrameSize and HasCalls variables for the function's
- // frame information. Also eliminates call frame pseudo instructions.
+ // Calculate the MaxCallFrameSize and AdjustsStack variables for the
+ // function's frame information. Also eliminates call frame pseudo
+ // instructions.
calculateCallsInformation(Fn);
// Allow the target machine to make some adjustments to the function
@@ -91,8 +92,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Add prolog and epilog code to the function. This function is required
// to align the stack frame as necessary for any stack variables or
- // called functions. Because of this, calculateCalleeSavedRegisters
- // must be called before this function in order to set the HasCalls
+ // called functions. Because of this, calculateCalleeSavedRegisters()
+ // must be called before this function in order to set the AdjustsStack
// and MaxCallFrameSize variables.
if (!F->hasFnAttr(Attribute::Naked))
insertPrologEpilogCode(Fn);
@@ -126,7 +127,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
}
#endif
-/// calculateCallsInformation - Calculate the MaxCallFrameSize and HasCalls
+/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack
/// variables for the function's frame information and eliminate call frame
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
@@ -134,7 +135,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
- bool HasCalls = MFI->hasCalls();
+ bool AdjustsStack = MFI->adjustsStack();
// Get the function call frame set-up and tear-down instruction opcode
int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
@@ -154,15 +155,15 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
" instructions should have a single immediate argument!");
unsigned Size = I->getOperand(0).getImm();
if (Size > MaxCallFrameSize) MaxCallFrameSize = Size;
- HasCalls = true;
+ AdjustsStack = true;
FrameSDOps.push_back(I);
} else if (I->isInlineAsm()) {
// An InlineAsm might be a call; assume it is to get the stack frame
// aligned correctly for calls.
- HasCalls = true;
+ AdjustsStack = true;
}
- MFI->setHasCalls(HasCalls);
+ MFI->setAdjustsStack(AdjustsStack);
MFI->setMaxCallFrameSize(MaxCallFrameSize);
for (std::vector<MachineBasicBlock::iterator>::iterator
@@ -289,12 +290,13 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
return;
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
if (! ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
- if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI)) {
+ if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in.
// It's killed at the spill.
@@ -302,7 +304,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Insert the spill to the stack frame.
TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true,
- CSI[i].getFrameIdx(), CSI[i].getRegClass());
+ CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI);
}
}
@@ -324,11 +326,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that preceed it.
- if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI)) {
+ if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(),
CSI[i].getFrameIdx(),
- CSI[i].getRegClass());
+ CSI[i].getRegClass(), TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -375,7 +377,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(),
true,
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass());
+ blockCSI[i].getRegClass(), TRI);
}
}
@@ -423,7 +425,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) {
TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(),
blockCSI[i].getFrameIdx(),
- blockCSI[i].getRegClass());
+ blockCSI[i].getRegClass(), TRI);
assert(I != MBB->begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert
@@ -576,7 +578,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI->hasCalls() && RegInfo->hasReservedCallFrame(Fn))
+ if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
Offset += MFI->getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
@@ -585,13 +587,14 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// otherwise, for leaf functions, align to the TransientStackAlignment
// value.
unsigned StackAlign;
- if (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
+ if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
(RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
StackAlign = TFI.getStackAlignment();
else
StackAlign = TFI.getTransientStackAlignment();
- // If the frame pointer is eliminated, all frame offsets will be relative
- // to SP not FP; align to MaxAlign so this works.
+
+ // If the frame pointer is eliminated, all frame offsets will be relative to
+ // SP not FP. Align to MaxAlign so this works.
StackAlign = std::max(StackAlign, MaxAlign);
unsigned AlignMask = StackAlign - 1;
Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
@@ -601,7 +604,6 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
MFI->setStackSize(Offset - LocalAreaOffset);
}
-
/// insertPrologEpilogCode - Scan the function for modified callee saved
/// registers, insert spill code for these callee saved registers, then add
/// prolog and epilog code to the function.
@@ -620,7 +622,6 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
}
}
-
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
///
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 2caf1df..b3b5760 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -18,7 +18,6 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -38,6 +37,7 @@ using namespace llvm;
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
static RegisterRegAlloc
fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator);
@@ -46,77 +46,80 @@ namespace {
class RAFast : public MachineFunctionPass {
public:
static char ID;
- RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {}
+ RAFast() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1),
+ isBulkSpilling(false) {}
private:
const TargetMachine *TM;
MachineFunction *MF;
+ MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
+ // Basic block currently being allocated.
+ MachineBasicBlock *MBB;
+
// StackSlotForVirtReg - Maps virtual regs to the frame index where these
// values are spilled.
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
- // Virt2PhysRegMap - This map contains entries for each virtual register
+ // Everything we know about a live virtual register.
+ struct LiveReg {
+ MachineInstr *LastUse; // Last instr to use reg.
+ unsigned PhysReg; // Currently held here.
+ unsigned short LastOpNum; // OpNum on LastUse.
+ bool Dirty; // Register needs spill.
+
+ LiveReg(unsigned p=0) : LastUse(0), PhysReg(p), LastOpNum(0),
+ Dirty(false) {}
+ };
+
+ typedef DenseMap<unsigned, LiveReg> LiveRegMap;
+ typedef LiveRegMap::value_type LiveRegEntry;
+
+ // LiveVirtRegs - This map contains entries for each virtual register
// that is currently available in a physical register.
- IndexedMap<unsigned, VirtReg2IndexFunctor> Virt2PhysRegMap;
+ LiveRegMap LiveVirtRegs;
- unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) {
- return Virt2PhysRegMap[VirtReg];
- }
+ // RegState - Track the state of a physical register.
+ enum RegState {
+ // A disabled register is not available for allocation, but an alias may
+ // be in use. A register can only be moved out of the disabled state if
+ // all aliases are disabled.
+ regDisabled,
- // PhysRegsUsed - This array is effectively a map, containing entries for
- // each physical register that currently has a value (ie, it is in
- // Virt2PhysRegMap). The value mapped to is the virtual register
- // corresponding to the physical register (the inverse of the
- // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned
- // because it is used by a future instruction, and to -2 if it is not
- // allocatable. If the entry for a physical register is -1, then the
- // physical register is "not in the map".
- //
- std::vector<int> PhysRegsUsed;
+ // A free register is not currently in use and can be allocated
+ // immediately without checking aliases.
+ regFree,
+
+ // A reserved register has been assigned expolicitly (e.g., setting up a
+ // call parameter), and it remains reserved until it is used.
+ regReserved
+
+ // A register state may also be a virtual register number, indication that
+ // the physical register is currently allocated to a virtual register. In
+ // that case, LiveVirtRegs contains the inverse mapping.
+ };
+
+ // PhysRegState - One of the RegState enums, or a virtreg.
+ std::vector<unsigned> PhysRegState;
// UsedInInstr - BitVector of physregs that are used in the current
// instruction, and so cannot be allocated.
BitVector UsedInInstr;
- // Virt2LastUseMap - This maps each virtual register to its last use
- // (MachineInstr*, operand index pair).
- IndexedMap<std::pair<MachineInstr*, unsigned>, VirtReg2IndexFunctor>
- Virt2LastUseMap;
-
- std::pair<MachineInstr*,unsigned>& getVirtRegLastUse(unsigned Reg) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- return Virt2LastUseMap[Reg];
- }
-
- // VirtRegModified - This bitset contains information about which virtual
- // registers need to be spilled back to memory when their registers are
- // scavenged. If a virtual register has simply been rematerialized, there
- // is no reason to spill it to memory when we need the register back.
- //
- BitVector VirtRegModified;
-
- // UsedInMultipleBlocks - Tracks whether a particular register is used in
- // more than one block.
- BitVector UsedInMultipleBlocks;
-
- void markVirtRegModified(unsigned Reg, bool Val = true) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- if (Val)
- VirtRegModified.set(Reg);
- else
- VirtRegModified.reset(Reg);
- }
+ // Allocatable - vector of allocatable physical registers.
+ BitVector Allocatable;
- bool isVirtRegModified(unsigned Reg) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
- VirtRegModified.size() && "Illegal virtual register!");
- return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
- }
+ // isBulkSpilling - This flag is set when LiveRegMap will be cleared
+ // completely after spilling all live registers. LiveRegMap entries should
+ // not be erased.
+ bool isBulkSpilling;
+ enum {
+ spillClean = 1,
+ spillDirty = 100,
+ spillImpossible = ~0u
+ };
public:
virtual const char *getPassName() const {
return "Fast Register Allocator";
@@ -124,104 +127,34 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequired<LiveVariables>();
AU.addRequiredID(PHIEliminationID);
AU.addRequiredID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
- /// runOnMachineFunction - Register allocate the whole function
bool runOnMachineFunction(MachineFunction &Fn);
-
- /// AllocateBasicBlock - Register allocate the specified basic block.
- void AllocateBasicBlock(MachineBasicBlock &MBB);
-
-
- /// areRegsEqual - This method returns true if the specified registers are
- /// related to each other. To do this, it checks to see if they are equal
- /// or if the first register is in the alias set of the second register.
- ///
- bool areRegsEqual(unsigned R1, unsigned R2) const {
- if (R1 == R2) return true;
- for (const unsigned *AliasSet = TRI->getAliasSet(R2);
- *AliasSet; ++AliasSet) {
- if (*AliasSet == R1) return true;
- }
- return false;
- }
-
- /// getStackSpaceFor - This returns the frame index of the specified virtual
- /// register on the stack, allocating space if necessary.
+ void AllocateBasicBlock();
int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC);
-
- /// removePhysReg - This method marks the specified physical register as no
- /// longer being in use.
- ///
- void removePhysReg(unsigned PhysReg);
-
- /// spillVirtReg - This method spills the value specified by PhysReg into
- /// the virtual register slot specified by VirtReg. It then updates the RA
- /// data structures to indicate the fact that PhysReg is now available.
- ///
- void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned VirtReg, unsigned PhysReg);
-
- /// spillPhysReg - This method spills the specified physical register into
- /// the virtual register slot associated with it. If OnlyVirtRegs is set to
- /// true, then the request is ignored if the physical register does not
- /// contain a virtual register.
- ///
- void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs = false);
-
- /// assignVirtToPhysReg - This method updates local state so that we know
- /// that PhysReg is the proper container for VirtReg now. The physical
- /// register must not be used for anything else when this is called.
- ///
- void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg);
-
- /// isPhysRegAvailable - Return true if the specified physical register is
- /// free and available for use. This also includes checking to see if
- /// aliased registers are all free...
- ///
- bool isPhysRegAvailable(unsigned PhysReg) const;
-
- /// isPhysRegSpillable - Can PhysReg be freed by spilling?
- bool isPhysRegSpillable(unsigned PhysReg) const;
-
- /// getFreeReg - Look to see if there is a free register available in the
- /// specified register class. If not, return 0.
- ///
- unsigned getFreeReg(const TargetRegisterClass *RC);
-
- /// getReg - Find a physical register to hold the specified virtual
- /// register. If all compatible physical registers are used, this method
- /// spills the last used virtual register to the stack, and uses that
- /// register. If NoFree is true, that means the caller knows there isn't
- /// a free register, do not call getFreeReg().
- unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned VirtReg, bool NoFree = false);
-
- /// reloadVirtReg - This method transforms the specified virtual
- /// register use to refer to a physical register. This method may do this
- /// in one of several ways: if the register is available in a physical
- /// register already, it uses that physical register. If the value is not
- /// in a physical register, and if there are physical registers available,
- /// it loads it into a register: PhysReg if that is an available physical
- /// register, otherwise any physical register of the right class.
- /// If register pressure is high, and it is possible, it tries to fold the
- /// load of the virtual register into the instruction itself. It avoids
- /// doing this if register pressure is low to improve the chance that
- /// subsequent instructions can use the reloaded value. This method
- /// returns the modified instruction.
- ///
- MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum, SmallSet<unsigned, 4> &RRegs,
- unsigned PhysReg);
-
- void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I,
- unsigned PhysReg);
+ bool isLastUseOfLocalReg(MachineOperand&);
+
+ void addKillFlag(const LiveReg&);
+ void killVirtReg(LiveRegMap::iterator);
+ void killVirtReg(unsigned VirtReg);
+ void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator);
+ void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg);
+
+ void usePhysReg(MachineOperand&);
+ void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState);
+ unsigned calcSpillCost(unsigned PhysReg) const;
+ void assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg);
+ void allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint);
+ LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint);
+ void spillAll(MachineInstr *MI);
+ bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg);
};
char RAFast::ID = 0;
}
@@ -243,687 +176,668 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return FrameIdx;
}
-
-/// removePhysReg - This method marks the specified physical register as no
-/// longer being in use.
-///
-void RAFast::removePhysReg(unsigned PhysReg) {
- PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used
-}
-
-
-/// spillVirtReg - This method spills the value specified by PhysReg into the
-/// virtual register slot specified by VirtReg. It then updates the RA data
-/// structures to indicate the fact that PhysReg is now available.
+/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to
+/// its virtual register, and it is guaranteed to be a block-local register.
///
-void RAFast::spillVirtReg(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned VirtReg, unsigned PhysReg) {
- assert(VirtReg && "Spilling a physical register is illegal!"
- " Must not have appropriate kill for the register or use exists beyond"
- " the intended one.");
- DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg)
- << " containing %reg" << VirtReg);
-
- if (!isVirtRegModified(VirtReg)) {
- DEBUG(dbgs() << " which has not been modified, so no store necessary!");
- std::pair<MachineInstr*, unsigned> &LastUse = getVirtRegLastUse(VirtReg);
- if (LastUse.first)
- LastUse.first->getOperand(LastUse.second).setIsKill();
- } else {
- // Otherwise, there is a virtual register corresponding to this physical
- // register. We only need to spill it into its stack slot if it has been
- // modified.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << " to stack slot #" << FrameIndex);
- // If the instruction reads the register that's spilled, (e.g. this can
- // happen if it is a move to a physical register), then the spill
- // instruction is not a kill.
- bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg));
- TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
- ++NumStores; // Update statistics
- }
+bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) {
+ // Check for non-debug uses or defs following MO.
+ // This is the most likely way to fail - fast path it.
+ MachineOperand *Next = &MO;
+ while ((Next = Next->getNextOperandForReg()))
+ if (!Next->isDebug())
+ return false;
- getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available
+ // If the register has ever been spilled or reloaded, we conservatively assume
+ // it is a global register used in multiple blocks.
+ if (StackSlotForVirtReg[MO.getReg()] != -1)
+ return false;
- DEBUG(dbgs() << '\n');
- removePhysReg(PhysReg);
+ // Check that the use/def chain has exactly one operand - MO.
+ return &MRI->reg_nodbg_begin(MO.getReg()).getOperand() == &MO;
}
-
-/// spillPhysReg - This method spills the specified physical register into the
-/// virtual register slot associated with it. If OnlyVirtRegs is set to true,
-/// then the request is ignored if the physical register does not contain a
-/// virtual register.
-///
-void RAFast::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned PhysReg, bool OnlyVirtRegs) {
- if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used!
- assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
- if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
- spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
- return;
+/// addKillFlag - Set kill flags on last use of a virtual register.
+void RAFast::addKillFlag(const LiveReg &LR) {
+ if (!LR.LastUse) return;
+ MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum);
+ if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) {
+ if (MO.getReg() == LR.PhysReg)
+ MO.setIsKill();
+ else
+ LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true);
}
+}
- // If the selected register aliases any other registers, we must make
- // sure that one of the aliases isn't alive.
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register.
- PhysRegsUsed[*AliasSet] == -2) // If allocatable.
- continue;
-
- if (PhysRegsUsed[*AliasSet])
- spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
- }
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(LiveRegMap::iterator LRI) {
+ addKillFlag(LRI->second);
+ const LiveReg &LR = LRI->second;
+ assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+ PhysRegState[LR.PhysReg] = regFree;
+ // Erase from LiveVirtRegs unless we're spilling in bulk.
+ if (!isBulkSpilling)
+ LiveVirtRegs.erase(LRI);
}
+/// killVirtReg - Mark virtreg as no longer available.
+void RAFast::killVirtReg(unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "killVirtReg needs a virtual register");
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ if (LRI != LiveVirtRegs.end())
+ killVirtReg(LRI);
+}
-/// assignVirtToPhysReg - This method updates local state so that we know
-/// that PhysReg is the proper container for VirtReg now. The physical
-/// register must not be used for anything else when this is called.
-///
-void RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) {
- assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!");
- // Update information to note the fact that this register was just used, and
- // it holds VirtReg.
- PhysRegsUsed[PhysReg] = VirtReg;
- getVirt2PhysRegMapSlot(VirtReg) = PhysReg;
- UsedInInstr.set(PhysReg);
+/// spillVirtReg - This method spills the value specified by VirtReg into the
+/// corresponding stack slot if needed. If isKill is set, the register is also
+/// killed.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Spilling a physical register is illegal!");
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(VirtReg);
+ assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register");
+ spillVirtReg(MI, LRI);
}
+/// spillVirtReg - Do the actual work of spilling.
+void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
+ LiveRegMap::iterator LRI) {
+ LiveReg &LR = LRI->second;
+ assert(PhysRegState[LR.PhysReg] == LRI->first && "Broken RegState mapping");
+
+ if (LR.Dirty) {
+ // If this physreg is used by the instruction, we want to kill it on the
+ // instruction, not on the spill.
+ bool SpillKill = LR.LastUse != MI;
+ LR.Dirty = false;
+ DEBUG(dbgs() << "Spilling %reg" << LRI->first
+ << " in " << TRI->getName(LR.PhysReg));
+ const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
+ int FI = getStackSpaceFor(LRI->first, RC);
+ DEBUG(dbgs() << " to stack slot #" << FI << "\n");
+ TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI);
+ ++NumStores; // Update statistics
-/// isPhysRegAvailable - Return true if the specified physical register is free
-/// and available for use. This also includes checking to see if aliased
-/// registers are all free...
-///
-bool RAFast::isPhysRegAvailable(unsigned PhysReg) const {
- if (PhysRegsUsed[PhysReg] != -1) return false;
-
- // If the selected register aliases any other allocated registers, it is
- // not free!
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet)
- if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use?
- return false; // Can't use this reg then.
- return true;
+ if (SpillKill)
+ LR.LastUse = 0; // Don't kill register again
+ }
+ killVirtReg(LRI);
}
-/// isPhysRegSpillable - Return true if the specified physical register can be
-/// spilled for use in the current instruction.
-///
-bool RAFast::isPhysRegSpillable(unsigned PhysReg) const {
- // Test that PhysReg and all aliases are either free or assigned to a VirtReg
- // that is not used in the instruction.
- if (PhysRegsUsed[PhysReg] != -1 &&
- (PhysRegsUsed[PhysReg] <= 0 || UsedInInstr.test(PhysReg)))
- return false;
-
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet)
- if (PhysRegsUsed[*AliasSet] != -1 &&
- (PhysRegsUsed[*AliasSet] <= 0 || UsedInInstr.test(*AliasSet)))
- return false;
- return true;
+/// spillAll - Spill all dirty virtregs without killing them.
+void RAFast::spillAll(MachineInstr *MI) {
+ if (LiveVirtRegs.empty()) return;
+ isBulkSpilling = true;
+ // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
+ // of spilling here is deterministic, if arbitrary.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end();
+ i != e; ++i)
+ spillVirtReg(MI, i);
+ LiveVirtRegs.clear();
+ isBulkSpilling = false;
}
+/// usePhysReg - Handle the direct use of a physical register.
+/// Check that the register is not used by a virtreg.
+/// Kill the physreg, marking it free.
+/// This may add implicit kills to MO->getParent() and invalidate MO.
+void RAFast::usePhysReg(MachineOperand &MO) {
+ unsigned PhysReg = MO.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
+ "Bad usePhysReg operand");
+
+ switch (PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ PhysRegState[PhysReg] = regFree;
+ // Fall through
+ case regFree:
+ UsedInInstr.set(PhysReg);
+ MO.setIsKill();
+ return;
+ default:
+ // The physreg was allocated to a virtual register. That means to value we
+ // wanted has been clobbered.
+ llvm_unreachable("Instruction uses an allocated register");
+ }
-/// getFreeReg - Look to see if there is a free register available in the
-/// specified register class. If not, return 0.
-///
-unsigned RAFast::getFreeReg(const TargetRegisterClass *RC) {
- // Get iterators defining the range of registers that are valid to allocate in
- // this class, which also specifies the preferred allocation order.
- TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
- TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
- for (; RI != RE; ++RI)
- if (isPhysRegAvailable(*RI)) { // Is reg unused?
- assert(*RI != 0 && "Cannot use register!");
- return *RI; // Found an unused register!
+ // Maybe a superregister is reserved?
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ switch (PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regReserved:
+ assert(TRI->isSuperRegister(PhysReg, Alias) &&
+ "Instruction is not using a subregister of a reserved register");
+ // Leave the superregister in the working set.
+ PhysRegState[Alias] = regFree;
+ UsedInInstr.set(Alias);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ case regFree:
+ if (TRI->isSuperRegister(PhysReg, Alias)) {
+ // Leave the superregister in the working set.
+ UsedInInstr.set(Alias);
+ MO.getParent()->addRegisterKilled(Alias, TRI, true);
+ return;
+ }
+ // Some other alias was in the working set - clear it.
+ PhysRegState[Alias] = regDisabled;
+ break;
+ default:
+ llvm_unreachable("Instruction uses an alias of an allocated register");
}
- return 0;
+ }
+
+ // All aliases are disabled, bring register into working set.
+ PhysRegState[PhysReg] = regFree;
+ UsedInInstr.set(PhysReg);
+ MO.setIsKill();
}
+/// definePhysReg - Mark PhysReg as reserved or free after spilling any
+/// virtregs. This is very similar to defineVirtReg except the physreg is
+/// reserved instead of allocated.
+void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
+ RegState NewState) {
+ UsedInInstr.set(PhysReg);
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[PhysReg] = NewState;
+ return;
+ }
-/// getReg - Find a physical register to hold the specified virtual
-/// register. If all compatible physical registers are used, this method spills
-/// the last used virtual register to the stack, and uses that register.
-///
-unsigned RAFast::getReg(MachineBasicBlock &MBB, MachineInstr *I,
- unsigned VirtReg, bool NoFree) {
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
+ // This is a disabled register, disable all aliases.
+ PhysRegState[PhysReg] = NewState;
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ UsedInInstr.set(Alias);
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ default:
+ spillVirtReg(MI, VirtReg);
+ // Fall through.
+ case regFree:
+ case regReserved:
+ PhysRegState[Alias] = regDisabled;
+ if (TRI->isSuperRegister(PhysReg, Alias))
+ return;
+ break;
+ }
+ }
+}
- // First check to see if we have a free register of the requested type...
- unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
- if (PhysReg != 0) {
- // Assign the register.
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
+// calcSpillCost - Return the cost of spilling clearing out PhysReg and
+// aliases so it is free for allocation.
+// Returns 0 when PhysReg is free or disabled with all aliases disabled - it
+// can be allocated directly.
+// Returns spillImpossible when PhysReg or an alias can't be spilled.
+unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
+ if (UsedInInstr.test(PhysReg))
+ return spillImpossible;
+ switch (unsigned VirtReg = PhysRegState[PhysReg]) {
+ case regDisabled:
+ break;
+ case regFree:
+ return 0;
+ case regReserved:
+ return spillImpossible;
+ default:
+ return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
}
- // If we didn't find an unused register, scavenge one now! Don't be fancy,
- // just grab the first possible register.
- TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF);
- TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF);
-
- for (; RI != RE; ++RI)
- if (isPhysRegSpillable(*RI)) {
- PhysReg = *RI;
+ // This is a disabled register, add up const of aliases.
+ unsigned Cost = 0;
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS) {
+ if (UsedInInstr.test(Alias))
+ return spillImpossible;
+ switch (unsigned VirtReg = PhysRegState[Alias]) {
+ case regDisabled:
+ break;
+ case regFree:
+ ++Cost;
+ break;
+ case regReserved:
+ return spillImpossible;
+ default:
+ Cost += LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
break;
}
-
- assert(PhysReg && "Physical register not assigned!?!?");
- spillPhysReg(MBB, I, PhysReg);
- assignVirtToPhysReg(VirtReg, PhysReg);
- return PhysReg;
+ }
+ return Cost;
}
-/// reloadVirtReg - This method transforms the specified virtual
-/// register use to refer to a physical register. This method may do this in
-/// one of several ways: if the register is available in a physical register
-/// already, it uses that physical register. If the value is not in a physical
-/// register, and if there are physical registers available, it loads it into a
-/// register: PhysReg if that is an available physical register, otherwise any
-/// register. If register pressure is high, and it is possible, it tries to
-/// fold the load of the virtual register into the instruction itself. It
-/// avoids doing this if register pressure is low to improve the chance that
-/// subsequent instructions can use the reloaded value. This method returns
-/// the modified instruction.
+/// assignVirtToPhysReg - This method updates local state so that we know
+/// that PhysReg is the proper container for VirtReg now. The physical
+/// register must not be used for anything else when this is called.
///
-MachineInstr *RAFast::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
- unsigned OpNum,
- SmallSet<unsigned, 4> &ReloadedRegs,
- unsigned PhysReg) {
- unsigned VirtReg = MI->getOperand(OpNum).getReg();
-
- // If the virtual register is already available, just update the instruction
- // and return.
- if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
- MI->getOperand(OpNum).setReg(PR); // Assign the input register
- if (!MI->isDebugValue()) {
- // Do not do these for DBG_VALUE as they can affect codegen.
- UsedInInstr.set(PR);
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
+void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
+ DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
+ << TRI->getName(PhysReg) << "\n");
+ PhysRegState[PhysReg] = LRE.first;
+ assert(!LRE.second.PhysReg && "Already assigned a physreg");
+ LRE.second.PhysReg = PhysReg;
+}
+
+/// allocVirtReg - Allocate a physical register for VirtReg.
+void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
+ const unsigned VirtReg = LRE.first;
+
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Can only allocate virtual registers");
+
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+
+ // Ignore invalid hints.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || !Allocatable.test(Hint)))
+ Hint = 0;
+
+ // Take hint when possible.
+ if (Hint) {
+ switch(calcSpillCost(Hint)) {
+ default:
+ definePhysReg(MI, Hint, regFree);
+ // Fall through.
+ case 0:
+ return assignVirtToPhysReg(LRE, Hint);
+ case spillImpossible:
+ break;
}
- return MI;
}
- // Otherwise, we need to fold it into the current instruction, or reload it.
- // If we have registers available to hold the value, use them.
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
- // If we already have a PhysReg (this happens when the instruction is a
- // reg-to-reg copy with a PhysReg destination) use that.
- if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) ||
- !isPhysRegAvailable(PhysReg))
- PhysReg = getFreeReg(RC);
- int FrameIndex = getStackSpaceFor(VirtReg, RC);
-
- if (PhysReg) { // Register is available, allocate it!
- assignVirtToPhysReg(VirtReg, PhysReg);
- } else { // No registers available.
- // Force some poor hapless value out of the register file to
- // make room for the new register, and reload it.
- PhysReg = getReg(MBB, MI, VirtReg, true);
+ TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
+ TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
+
+ // First try to find a completely free register.
+ for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ unsigned PhysReg = *I;
+ if (PhysRegState[PhysReg] == regFree && !UsedInInstr.test(PhysReg))
+ return assignVirtToPhysReg(LRE, PhysReg);
}
- markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded
+ DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
+ << "\n");
+
+ unsigned BestReg = 0, BestCost = spillImpossible;
+ for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
+ unsigned Cost = calcSpillCost(*I);
+ // Cost is 0 when all aliases are already disabled.
+ if (Cost == 0)
+ return assignVirtToPhysReg(LRE, *I);
+ if (Cost < BestCost)
+ BestReg = *I, BestCost = Cost;
+ }
- DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into "
- << TRI->getName(PhysReg) << "\n");
+ if (BestReg) {
+ definePhysReg(MI, BestReg, regFree);
+ return assignVirtToPhysReg(LRE, BestReg);
+ }
- // Add move instruction(s)
- TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
- ++NumLoads; // Update statistics
-
- MF->getRegInfo().setPhysRegUsed(PhysReg);
- MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
- getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
-
- if (!ReloadedRegs.insert(PhysReg)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- report_fatal_error(Msg.str());
+ // Nothing we can do.
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for "
+ << "invalid constraints:\n";
+ MI->print(Msg, TM);
}
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (ReloadedRegs.insert(*SubRegs)) continue;
-
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
+ report_fatal_error(Msg.str());
+}
+
+/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
+RAFast::LiveRegMap::iterator
+RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+ LiveReg &LR = LRI->second;
+ bool PartialRedef = MI->getOperand(OpNum).getSubReg();
+ if (New) {
+ // If there is no hint, peek at the only use of this register.
+ if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ MRI->hasOneNonDBGUse(VirtReg)) {
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ // It's a copy, use the destination register as a hint.
+ if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg),
+ SrcReg, DstReg, SrcSubReg, DstSubReg))
+ Hint = DstReg;
+ }
+ allocVirtReg(MI, *LRI, Hint);
+ // If this is only a partial redefinition, we must reload the other parts.
+ if (PartialRedef && MI->readsVirtualRegister(VirtReg)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FI = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI);
+ ++NumLoads;
}
- report_fatal_error(Msg.str());
+ } else if (LR.LastUse && !PartialRedef) {
+ // Redefining a live register - kill at the last use, unless it is this
+ // instruction defining VirtReg multiple times.
+ if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse())
+ addKillFlag(LR);
}
-
- return MI;
+ assert(LR.PhysReg && "Register not assigned");
+ LR.LastUse = MI;
+ LR.LastOpNum = OpNum;
+ LR.Dirty = true;
+ UsedInInstr.set(LR.PhysReg);
+ return LRI;
}
-/// isReadModWriteImplicitKill - True if this is an implicit kill for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- MO.isDef() && !MO.isDead())
- return true;
+/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it.
+RAFast::LiveRegMap::iterator
+RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
+ unsigned VirtReg, unsigned Hint) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register");
+ LiveRegMap::iterator LRI;
+ bool New;
+ tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg()));
+ LiveReg &LR = LRI->second;
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (New) {
+ allocVirtReg(MI, *LRI, Hint);
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ int FrameIndex = getStackSpaceFor(VirtReg, RC);
+ DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
+ << TRI->getName(LR.PhysReg) << "\n");
+ TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
+ ++NumLoads;
+ } else if (LR.Dirty) {
+ if (isLastUseOfLocalReg(MO)) {
+ DEBUG(dbgs() << "Killing last use: " << MO << "\n");
+ MO.setIsKill();
+ } else if (MO.isKill()) {
+ DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n");
+ MO.setIsKill(false);
+ }
+ } else if (MO.isKill()) {
+ // We must remove kill flags from uses of reloaded registers because the
+ // register would be killed immediately, and there might be a second use:
+ // %foo = OR %x<kill>, %x
+ // This would cause a second reload of %x into a different register.
+ DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n");
+ MO.setIsKill(false);
}
- return false;
+ assert(LR.PhysReg && "Register not assigned");
+ LR.LastUse = MI;
+ LR.LastOpNum = OpNum;
+ UsedInInstr.set(LR.PhysReg);
+ return LRI;
}
-/// isReadModWriteImplicitDef - True if this is an implicit def for a
-/// read/mod/write register, i.e. update partial register.
-static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
- !MO.isDef() && MO.isKill())
- return true;
+// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering
+// subregs. This may invalidate any operand pointers.
+// Return true if the operand kills its register.
+bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) {
+ MachineOperand &MO = MI->getOperand(OpNum);
+ if (!MO.getSubReg()) {
+ MO.setReg(PhysReg);
+ return MO.isKill() || MO.isDead();
}
- return false;
-}
-void RAFast::AllocateBasicBlock(MachineBasicBlock &MBB) {
- // loop over each instruction
- MachineBasicBlock::iterator MII = MBB.begin();
-
- DEBUG({
- const BasicBlock *LBB = MBB.getBasicBlock();
- if (LBB)
- dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName();
- });
-
- // Add live-in registers as active.
- for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(),
- E = MBB.livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- MF->getRegInfo().setPhysRegUsed(Reg);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
+ // Handle subregister index.
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setSubReg(0);
+
+ // A kill flag implies killing the full register. Add corresponding super
+ // register kill.
+ if (MO.isKill()) {
+ MI->addRegisterKilled(PhysReg, TRI, true);
+ return true;
}
+ return MO.isDead();
+}
+
+void RAFast::AllocateBasicBlock() {
+ DEBUG(dbgs() << "\nAllocating " << *MBB);
+
+ PhysRegState.assign(TRI->getNumRegs(), regDisabled);
+ assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
+
+ MachineBasicBlock::iterator MII = MBB->begin();
+
+ // Add live-in registers as live.
+ for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(),
+ E = MBB->livein_end(); I != E; ++I)
+ definePhysReg(MII, *I, regReserved);
+
+ SmallVector<unsigned, 8> PhysECs, VirtDead;
+ SmallVector<MachineInstr*, 32> Coalesced;
// Otherwise, sequentially allocate each instruction in the MBB.
- while (MII != MBB.end()) {
+ while (MII != MBB->end()) {
MachineInstr *MI = MII++;
const TargetInstrDesc &TID = MI->getDesc();
DEBUG({
- dbgs() << "\nStarting RegAlloc of: " << *MI;
- dbgs() << " Regs have values: ";
- for (unsigned i = 0; i != TRI->getNumRegs(); ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2)
- dbgs() << "[" << TRI->getName(i)
- << ",%reg" << PhysRegsUsed[i] << "] ";
+ dbgs() << "\n>> " << *MI << "Regs:";
+ for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
+ if (PhysRegState[Reg] == regDisabled) continue;
+ dbgs() << " " << TRI->getName(Reg);
+ switch(PhysRegState[Reg]) {
+ case regFree:
+ break;
+ case regReserved:
+ dbgs() << "*";
+ break;
+ default:
+ dbgs() << "=%reg" << PhysRegState[Reg];
+ if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
+ dbgs() << "*";
+ assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
+ "Bad inverse map");
+ break;
+ }
+ }
dbgs() << '\n';
+ // Check that LiveVirtRegs is the inverse.
+ for (LiveRegMap::iterator i = LiveVirtRegs.begin(),
+ e = LiveVirtRegs.end(); i != e; ++i) {
+ assert(TargetRegisterInfo::isVirtualRegister(i->first) &&
+ "Bad map key");
+ assert(TargetRegisterInfo::isPhysicalRegister(i->second.PhysReg) &&
+ "Bad map value");
+ assert(PhysRegState[i->second.PhysReg] == i->first &&
+ "Bad inverse map");
+ }
});
- // Track registers used by instruction.
- UsedInInstr.reset();
-
- // Determine whether this is a copy instruction. The cases where the
- // source or destination are phys regs are handled specially.
- unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
- unsigned SrcCopyPhysReg = 0U;
- bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg);
- if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
- SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
-
- // Loop over the implicit uses, making sure they don't get reallocated.
- if (TID.ImplicitUses) {
- for (const unsigned *ImplicitUses = TID.ImplicitUses;
- *ImplicitUses; ++ImplicitUses)
- UsedInInstr.set(*ImplicitUses);
- }
-
- SmallVector<unsigned, 8> Kills;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isKill()) continue;
-
- if (!MO.isImplicit())
- Kills.push_back(MO.getReg());
- else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
- // These are extra physical register kills when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- Kills.push_back(MO.getReg());
- }
-
- // If any physical regs are earlyclobber, spill any value they might
- // have in them, then mark them unallocatable.
- // If any virtual regs are earlyclobber, allocate them now (before
- // freeing inputs that are killed).
- if (MI->isInlineAsm()) {
+ // Debug values are not allowed to change codegen in any way.
+ if (MI->isDebugValue()) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
- !MO.getReg())
- continue;
-
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) =
- std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- MO.setReg(DestPhysReg); // Assign the earlyclobber register
- } else {
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- }
- }
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
+ if (LRI != LiveVirtRegs.end())
+ setPhysReg(MI, i, LRI->second.PhysReg);
+ else
+ MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry!
}
+ // Next instruction.
+ continue;
}
- // If a DBG_VALUE says something is located in a spilled register,
- // change the DBG_VALUE to be undef, which prevents the register
- // from being reloaded here. Doing that would change the generated
- // code, unless another use immediately follows this instruction.
- if (MI->isDebugValue() &&
- MI->getNumOperands()==3 && MI->getOperand(0).isReg()) {
- unsigned VirtReg = MI->getOperand(0).getReg();
- if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- !getVirt2PhysRegMapSlot(VirtReg))
- MI->getOperand(0).setReg(0U);
- }
+ // If this is a copy, we may be able to coalesce.
+ unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub;
+ if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub))
+ CopySrc = CopyDst = 0;
- // Get the used operands into registers. This has the potential to spill
- // incoming values if we are out of registers. Note that we completely
- // ignore physical register uses here. We assume that if an explicit
- // physical register is referenced by the instruction, that it is guaranteed
- // to be live-in, or the input is badly hosed.
- //
- SmallSet<unsigned, 4> ReloadedRegs;
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand &MO = MI->getOperand(i);
- // here we are looking for only used operands (never def&use)
- if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- MI = reloadVirtReg(MBB, MI, i, ReloadedRegs,
- isCopy ? DstCopyReg : 0);
- }
+ // Track registers used by instruction.
+ UsedInInstr.reset();
+ PhysECs.clear();
- // If this instruction is the last user of this register, kill the
- // value, freeing the register being used, so it doesn't need to be
- // spilled to memory.
- //
- for (unsigned i = 0, e = Kills.size(); i != e; ++i) {
- unsigned VirtReg = Kills[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- // If the virtual register was never materialized into a register, it
- // might not be in the map, but it won't hurt to zero it out anyway.
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
+ // First scan.
+ // Mark physreg uses and early clobbers as used.
+ // Find the end of the virtreg operands
+ unsigned VirtOpEnd = 0;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg) continue;
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ VirtOpEnd = i+1;
continue;
- } else {
- assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) &&
- "Silently clearing a virtual register?");
}
-
- if (!PhysReg) continue;
-
- DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
- << "[%reg" << VirtReg <<"], removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- DEBUG(dbgs() << " Last use of "
- << TRI->getName(*SubRegs) << "[%reg" << VirtReg
- <<"], removing it from live set\n");
- removePhysReg(*SubRegs);
- }
+ if (!Allocatable.test(Reg)) continue;
+ if (MO.isUse()) {
+ usePhysReg(MO);
+ } else if (MO.isEarlyClobber()) {
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ PhysECs.push_back(Reg);
}
}
- // Track registers defined by instruction.
- UsedInInstr.reset();
-
- // Loop over all of the operands of the instruction, spilling registers that
- // are defined, and marking explicit destinations in the PhysRegsUsed map.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Second scan.
+ // Allocate virtreg uses and early clobbers.
+ // Collect VirtKills
+ for (unsigned i = 0; i != VirtOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
- continue;
-
+ if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
-
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (MO.isUse()) {
+ LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
+ unsigned PhysReg = LRI->second.PhysReg;
+ CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0;
+ if (setPhysReg(MI, i, PhysReg))
+ killVirtReg(LRI);
+ } else if (MO.isEarlyClobber()) {
+ // Note: defineVirtReg may invalidate MO.
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0);
+ unsigned PhysReg = LRI->second.PhysReg;
+ setPhysReg(MI, i, PhysReg);
+ PhysECs.push_back(PhysReg);
}
}
- // Loop over the implicit defs, spilling them as well.
- if (TID.ImplicitDefs) {
- for (const unsigned *ImplicitDefs = TID.ImplicitDefs;
- *ImplicitDefs; ++ImplicitDefs) {
- unsigned Reg = *ImplicitDefs;
- if (PhysRegsUsed[Reg] != -2) {
- spillPhysReg(MBB, MI, Reg, true);
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- }
- MF->getRegInfo().setPhysRegUsed(Reg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] == -2) continue;
+ MRI->addPhysRegsUsed(UsedInInstr);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
- }
+ // Track registers defined by instruction - early clobbers at this point.
+ UsedInInstr.reset();
+ for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) {
+ unsigned PhysReg = PhysECs[i];
+ UsedInInstr.set(PhysReg);
+ for (const unsigned *AS = TRI->getAliasSet(PhysReg);
+ unsigned Alias = *AS; ++AS)
+ UsedInInstr.set(Alias);
}
- SmallVector<unsigned, 8> DeadDefs;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDead())
- DeadDefs.push_back(MO.getReg());
+ unsigned DefOpEnd = MI->getNumOperands();
+ if (TID.isCall()) {
+ // Spill all virtregs before a call. This serves two purposes: 1. If an
+ // exception is thrown, the landing pad is going to expect to find registers
+ // in their spill slots, and 2. we don't have to wade through all the
+ // <imp-def> operands on the call instruction.
+ DefOpEnd = VirtOpEnd;
+ DEBUG(dbgs() << " Spilling remaining registers before call.\n");
+ spillAll(MI);
}
- // Okay, we have allocated all of the source operands and spilled any values
- // that would be destroyed by defs of this instruction. Loop over the
- // explicit defs and assign them to a register, spilling incoming values if
- // we need to scavenge a register.
- //
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ // Third scan.
+ // Allocate defs and collect dead defs.
+ for (unsigned i = 0; i != DefOpEnd; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
- MO.isEarlyClobber() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
- // If this is a copy try to reuse the input as the output;
- // that will make the copy go away.
- // If this is a copy, the source reg is a phys reg, and
- // that reg is available, use that phys reg for DestPhysReg.
- // If this is a copy, the source reg is a virtual reg, and
- // the phys reg that was assigned to that virtual reg is now
- // available, use that phys reg for DestPhysReg. (If it's now
- // available that means this was the last use of the source.)
- if (isCopy &&
- TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
- isPhysRegAvailable(SrcCopyReg)) {
- DestPhysReg = SrcCopyReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else if (isCopy &&
- TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
- SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
- MF->getRegInfo().getRegClass(DestVirtReg)->
- contains(SrcCopyPhysReg)) {
- DestPhysReg = SrcCopyPhysReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- }
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- MO.setReg(DestPhysReg); // Assign the output register
- UsedInInstr.set(DestPhysReg);
- }
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue;
+ unsigned Reg = MO.getReg();
- // If this instruction defines any registers that are immediately dead,
- // kill them now.
- //
- for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) {
- unsigned VirtReg = DeadDefs[i];
- unsigned PhysReg = VirtReg;
- if (TargetRegisterInfo::isVirtualRegister(VirtReg)) {
- unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg);
- PhysReg = PhysRegSlot;
- assert(PhysReg != 0);
- PhysRegSlot = 0;
- } else if (PhysRegsUsed[PhysReg] == -2) {
- // Unallocatable register dead, ignore.
- continue;
- } else if (!PhysReg)
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!Allocatable.test(Reg)) continue;
+ definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ?
+ regFree : regReserved);
continue;
-
- DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
- << " [%reg" << VirtReg
- << "] is never used, removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] != -2) {
- DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
- << " [%reg" << *AliasSet
- << "] is never used, removing it from live set\n");
- removePhysReg(*AliasSet);
- }
}
+ LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc);
+ unsigned PhysReg = LRI->second.PhysReg;
+ if (setPhysReg(MI, i, PhysReg)) {
+ VirtDead.push_back(Reg);
+ CopyDst = 0; // cancel coalescing;
+ } else
+ CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0;
}
- // Finally, if this is a noop copy instruction, zap it. (Except that if
- // the copy is dead, it must be kept to avoid messing up liveness info for
- // the register scavenger. See pr4100.)
- if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg) &&
- SrcCopyReg == DstCopyReg && DeadDefs.empty())
- MBB.erase(MI);
+ // Kill dead defs after the scan to ensure that multiple defs of the same
+ // register are allocated identically. We didn't need to do this for uses
+ // because we are crerating our own kill flags, and they are always at the
+ // last use.
+ for (unsigned i = 0, e = VirtDead.size(); i != e; ++i)
+ killVirtReg(VirtDead[i]);
+ VirtDead.clear();
+
+ MRI->addPhysRegsUsed(UsedInInstr);
+
+ if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) {
+ DEBUG(dbgs() << "-- coalescing: " << *MI);
+ Coalesced.push_back(MI);
+ } else {
+ DEBUG(dbgs() << "<< " << *MI);
+ }
}
- MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
-
// Spill all physical registers holding virtual registers now.
- for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i)
- if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) {
- if (unsigned VirtReg = PhysRegsUsed[i])
- spillVirtReg(MBB, MI, VirtReg, i);
- else
- removePhysReg(i);
- }
+ DEBUG(dbgs() << "Spilling live registers at end of block.\n");
+ spillAll(MBB->getFirstTerminator());
+
+ // Erase all the coalesced copies. We are delaying it until now because
+ // LiveVirtRegs might refer to the instrs.
+ for (unsigned i = 0, e = Coalesced.size(); i != e; ++i)
+ MBB->erase(Coalesced[i]);
+ NumCopies += Coalesced.size();
+
+ DEBUG(MBB->dump());
}
/// runOnMachineFunction - Register allocate the whole function
///
bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
- DEBUG(dbgs() << "Machine Function\n");
+ DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)Fn.getFunction())->getName() << '\n');
MF = &Fn;
+ MRI = &MF->getRegInfo();
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
- PhysRegsUsed.assign(TRI->getNumRegs(), -1);
UsedInInstr.resize(TRI->getNumRegs());
-
- // At various places we want to efficiently check to see whether a register
- // is allocatable. To handle this, we mark all unallocatable registers as
- // being pinned down, permanently.
- {
- BitVector Allocable = TRI->getAllocatableSet(Fn);
- for (unsigned i = 0, e = Allocable.size(); i != e; ++i)
- if (!Allocable[i])
- PhysRegsUsed[i] = -2; // Mark the reg unallocable.
- }
+ Allocatable = TRI->getAllocatableSet(*MF);
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
+ unsigned LastVirtReg = MRI->getLastVirtReg();
StackSlotForVirtReg.grow(LastVirtReg);
- Virt2PhysRegMap.grow(LastVirtReg);
- Virt2LastUseMap.grow(LastVirtReg);
- VirtRegModified.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
- UsedInMultipleBlocks.resize(LastVirtReg+1 -
- TargetRegisterInfo::FirstVirtualRegister);
// Loop over all of the basic blocks, eliminating virtual register references
- for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
- MBB != MBBe; ++MBB)
- AllocateBasicBlock(*MBB);
+ for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
+ MBBi != MBBe; ++MBBi) {
+ MBB = &*MBBi;
+ AllocateBasicBlock();
+ }
+
+ // Make sure the set of used physregs is closed under subreg operations.
+ MRI->closePhysRegsUsed(*TRI);
StackSlotForVirtReg.clear();
- PhysRegsUsed.clear();
- VirtRegModified.clear();
- UsedInMultipleBlocks.clear();
- Virt2PhysRegMap.clear();
- Virt2LastUseMap.clear();
return true;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 6c8fc0c..bc331f0 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -809,7 +809,7 @@ float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_,
MachineInstr *MI = &*I;
if (cur->liveAt(li_->getInstructionIndex(MI))) {
unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent());
- Conflicts += powf(10.0f, (float)loopDepth);
+ Conflicts += std::pow(10.0f, (float)loopDepth);
}
}
return Conflicts;
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 94456d1..321ae12 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -37,6 +37,7 @@ using namespace llvm;
STATISTIC(NumStores, "Number of stores added");
STATISTIC(NumLoads , "Number of loads added");
+STATISTIC(NumCopies, "Number of copies coalesced");
static RegisterRegAlloc
localRegAlloc("local", "local register allocator",
@@ -50,6 +51,7 @@ namespace {
private:
const TargetMachine *TM;
MachineFunction *MF;
+ MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
@@ -297,8 +299,18 @@ void RALocal::storeVirtReg(MachineBasicBlock &MBB,
const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
DEBUG(dbgs() << " to stack slot #" << FrameIndex);
- TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC);
+ TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI);
++NumStores; // Update statistics
+
+ // Mark the spill instruction as last use if we're not killing the register.
+ if (!isKill) {
+ MachineInstr *Spill = llvm::prior(I);
+ int OpNum = Spill->findRegisterUseOperandIdx(PhysReg);
+ if (OpNum < 0)
+ getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0);
+ else
+ getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum);
+ }
}
/// spillVirtReg - This method spills the value specified by PhysReg into the
@@ -506,10 +518,15 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
SmallSet<unsigned, 4> &ReloadedRegs,
unsigned PhysReg) {
unsigned VirtReg = MI->getOperand(OpNum).getReg();
+ unsigned SubIdx = MI->getOperand(OpNum).getSubReg();
// If the virtual register is already available, just update the instruction
// and return.
if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) {
+ if (SubIdx) {
+ PR = TRI->getSubReg(PR, SubIdx);
+ MI->getOperand(OpNum).setSubReg(0);
+ }
MI->getOperand(OpNum).setReg(PR); // Assign the input register
if (!MI->isDebugValue()) {
// Do not do these for DBG_VALUE as they can affect codegen.
@@ -543,11 +560,16 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
<< TRI->getName(PhysReg) << "\n");
// Add move instruction(s)
- TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC);
+ TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI);
++NumLoads; // Update statistics
MF->getRegInfo().setPhysRegUsed(PhysReg);
- MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
+ // Assign the input register.
+ if (SubIdx) {
+ MI->getOperand(OpNum).setSubReg(0);
+ MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx));
+ } else
+ MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register
getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum);
if (!ReloadedRegs.insert(PhysReg)) {
@@ -626,7 +648,6 @@ static bool precedes(MachineBasicBlock::iterator A,
/// ComputeLocalLiveness - Computes liveness of registers within a basic
/// block, setting the killed/dead flags as appropriate.
void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
- MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Keep track of the most recently seen previous use or def of each reg,
// so that we can update them with dead/kill markers.
DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
@@ -672,18 +693,26 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
// - A def followed by a def is dead
// - A use followed by a def is a kill
if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
-
+
+ unsigned SubIdx = MO.getSubReg();
DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
last = LastUseDef.find(MO.getReg());
if (last != LastUseDef.end()) {
// Check if this is a two address instruction. If so, then
// the def does not kill the use.
- if (last->second.first == I &&
- I->isRegTiedToUseOperand(i))
+ if (last->second.first == I && I->isRegTiedToUseOperand(i))
continue;
MachineOperand &lastUD =
last->second.first->getOperand(last->second.second);
+ if (SubIdx && lastUD.getSubReg() != SubIdx)
+ // Partial re-def, the last def is not dead.
+ // %reg1024:5<def> =
+ // %reg1024:6<def> =
+ // or
+ // %reg1024:5<def> = op %reg1024, 5
+ continue;
+
if (lastUD.isDef())
lastUD.setIsDead(true);
else
@@ -732,8 +761,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
// it wouldn't have been otherwise. Nullify the DBG_VALUEs when that
// happens.
bool UsedByDebugValueOnly = false;
- for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
- UE = MRI.reg_end(); UI != UE; ++UI) {
+ for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
+ UE = MRI->reg_end(); UI != UE; ++UI) {
// Two cases:
// - used in another block
// - used in the same block before it is defined (loop)
@@ -755,8 +784,8 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
}
if (UsedByDebugValueOnly)
- for (MachineRegisterInfo::reg_iterator UI = MRI.reg_begin(MO.getReg()),
- UE = MRI.reg_end(); UI != UE; ++UI)
+ for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()),
+ UE = MRI->reg_end(); UI != UE; ++UI)
if (UI->isDebugValue() &&
(UI->getParent() != &MBB ||
(MO.isDef() && precedes(&*UI, MI))))
@@ -828,7 +857,8 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg;
unsigned SrcCopyPhysReg = 0U;
bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
- SrcCopySubReg, DstCopySubReg);
+ SrcCopySubReg, DstCopySubReg) &&
+ SrcCopySubReg == DstCopySubReg;
if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg))
SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg);
@@ -878,6 +908,10 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
std::make_pair((MachineInstr*)0, 0);
DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
<< " to %reg" << DestVirtReg << "\n");
+ if (unsigned DestSubIdx = MO.getSubReg()) {
+ MO.setSubReg(0);
+ DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
+ }
MO.setReg(DestPhysReg); // Assign the earlyclobber register
} else {
unsigned Reg = MO.getReg();
@@ -1073,6 +1107,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
<< " to %reg" << DestVirtReg << "\n");
+
+ if (unsigned DestSubIdx = MO.getSubReg()) {
+ MO.setSubReg(0);
+ DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx);
+ }
MO.setReg(DestPhysReg); // Assign the output register
}
@@ -1127,8 +1166,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
// the register scavenger. See pr4100.)
if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg,
SrcCopySubReg, DstCopySubReg) &&
- SrcCopyReg == DstCopyReg && DeadDefs.empty())
+ SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg &&
+ DeadDefs.empty()) {
+ ++NumCopies;
MBB.erase(MI);
+ }
}
MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
@@ -1165,6 +1207,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "Machine Function\n");
MF = &Fn;
+ MRI = &Fn.getRegInfo();
TM = &Fn.getTarget();
TRI = TM->getRegisterInfo();
TII = TM->getInstrInfo();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 81cfd8f..4fafd28 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -489,7 +489,7 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
// did, but none of their definitions would prevent us from coalescing.
// We're good to go with the coalesce.
- float cBenefit = powf(10.0f, loopInfo->getLoopDepth(mbb)) / 5.0;
+ float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 179984f..690e59f 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -343,12 +343,12 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Spill the scavenged register before I.
assert(ScavengingFrameIndex >= 0 &&
"Cannot scavenge register without an emergency spill slot!");
- TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC);
+ TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI);
MachineBasicBlock::iterator II = prior(I);
TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
// Restore the scavenged register before its use (or first terminator).
- TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC);
+ TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI);
II = prior(UseMI);
TRI->eliminateFrameIndex(II, SPAdj, NULL, this);
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 587f001..da20c12 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -27,7 +27,6 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
: TM(mf.getTarget()),
TII(TM.getInstrInfo()),
TRI(TM.getRegisterInfo()),
- TLI(TM.getTargetLowering()),
MF(mf), MRI(mf.getRegInfo()),
EntrySU(), ExitSU() {
}
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 8e03420..ee08e1d 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -51,7 +51,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
}
}
bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second,
- SU->CopyDstRC, SU->CopySrcRC);
+ SU->CopyDstRC, SU->CopySrcRC,
+ DebugLoc());
(void)Success;
assert(Success && "copyRegToReg failed!");
} else {
@@ -62,7 +63,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
isNew = isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(),
- SU->CopyDstRC, SU->CopySrcRC);
+ SU->CopyDstRC, SU->CopySrcRC,
+ DebugLoc());
(void)Success;
assert(Success && "copyRegToReg failed!");
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ca235c3..09202f8 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -210,7 +210,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
- SU->setDbgInstr(DanglingDebugValue[Reg].first);
+ SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
DbgValueVec[DanglingDebugValue[Reg].second] = 0;
DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
}
@@ -599,8 +599,8 @@ MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
}
BB->insert(InsertPos, SU->getInstr());
- if (SU->getDbgInstr())
- BB->insert(InsertPos, SU->getDbgInstr());
+ for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
+ BB->insert(InsertPos, SU->DbgInstrList[i]);
}
// Update the Begin iterator, as the first instruction in the block
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index d70608f..ad82db2 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -32,7 +32,7 @@ namespace llvm {
/// For example, loop induction variable increments should be
/// scheduled as soon as possible after the variable's last use.
///
- class VISIBILITY_HIDDEN LoopDependencies {
+ class LLVM_LIBRARY_VISIBILITY LoopDependencies {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
@@ -94,7 +94,7 @@ namespace llvm {
/// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
/// MachineInstrs.
- class VISIBILITY_HIDDEN ScheduleDAGInstrs : public ScheduleDAG {
+ class LLVM_LIBRARY_VISIBILITY ScheduleDAGInstrs : public ScheduleDAG {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
const MachineFrameInfo *MFI;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 3639f80..6bddd78 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -760,12 +760,18 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
bool Replace1 = false;
SDValue N1 = Op.getOperand(1);
- SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
- if (NN1.getNode() == 0)
- return SDValue();
+ SDValue NN1;
+ if (N0 == N1)
+ NN1 = NN0;
+ else {
+ NN1 = PromoteOperand(N1, PVT, Replace1);
+ if (NN1.getNode() == 0)
+ return SDValue();
+ }
AddToWorkList(NN0.getNode());
- AddToWorkList(NN1.getNode());
+ if (NN1.getNode())
+ AddToWorkList(NN1.getNode());
if (Replace0)
ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
@@ -3425,8 +3431,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
// fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -3564,7 +3574,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
- return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
}
}
@@ -3585,9 +3595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
-
-
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3615,8 +3623,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
}
}
@@ -3726,8 +3738,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
}
}
- // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
if (N0.getOpcode() == ISD::SETCC) {
+ if (!LegalOperations && VT.isVector()) {
+ // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
+ // Only do this before legalize for now.
+ EVT N0VT = N0.getOperand(0).getValueType();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
+ DAG.getConstant(1, EltVT));
+ if (VT.getSizeInBits() == N0VT.getSizeInBits()) {
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get()),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ } else {
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT),
+ DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
+ &OneOps[0], OneOps.size()));
+ }
+ }
+
+ // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -3780,8 +3832,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::TRUNCATE) {
SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
if (NarrowLoad.getNode()) {
- if (NarrowLoad.getNode() != N0.getNode())
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
}
}
@@ -3883,8 +3939,39 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
- // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
if (N0.getOpcode() == ISD::SETCC) {
+ // aext(setcc) -> sext_in_reg(vsetcc) for vectors.
+ // Only do this before legalize for now.
+ if (VT.isVector() && !LegalOperations) {
+ EVT N0VT = N0.getOperand(0).getValueType();
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ // If the desired elements are smaller or larger than the source
+ // elements we can use a matching integer vector type and then
+ // truncate/sign extend
+ else {
+ EVT MatchingElementType =
+ EVT::getIntegerVT(*DAG.getContext(),
+ N0VT.getScalarType().getSizeInBits());
+ EVT MatchingVectorType =
+ EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
+ N0VT.getVectorNumElements());
+ SDValue VsetCC =
+ DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0),
+ N0.getOperand(1),
+ cast<CondCodeSDNode>(N0.getOperand(2))->get());
+ return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT);
+ }
+ }
+
+ // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
SDValue SCC =
SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
DAG.getConstant(1, VT), DAG.getConstant(0, VT),
@@ -5278,10 +5365,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
SDValue Offset;
ISD::MemIndexedMode AM = ISD::UNINDEXED;
if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
- if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
- std::swap(BasePtr, Offset);
- if (Ptr != BasePtr)
- continue;
// Don't create a indexed load / store with zero offset.
if (isa<ConstantSDNode>(Offset) &&
cast<ConstantSDNode>(Offset)->isNullValue())
@@ -5953,6 +6036,10 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVal = N->getOperand(1);
SDValue EltNo = N->getOperand(2);
+ // If the inserted element is an UNDEF, just use the input vector.
+ if (InVal.getOpcode() == ISD::UNDEF)
+ return InVec;
+
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
// vector with the inserted element.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6206,7 +6293,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// all scalar elements the same.
if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
SDNode *V = N0.getNode();
-
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
@@ -6338,13 +6424,21 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
break;
}
- Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
- EltType, LHSOp, RHSOp));
- AddToWorkList(Ops.back().getNode());
- assert((Ops.back().getOpcode() == ISD::UNDEF ||
- Ops.back().getOpcode() == ISD::Constant ||
- Ops.back().getOpcode() == ISD::ConstantFP) &&
- "Scalar binop didn't fold!");
+ // If the vector element type is not legal, the BUILD_VECTOR operands
+ // are promoted and implicitly truncated. Make that explicit here.
+ if (LHSOp.getValueType() != EltType)
+ LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
+ if (RHSOp.getValueType() != EltType)
+ RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
+
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ LHSOp, RHSOp);
+ if (FoldOp.getOpcode() != ISD::UNDEF &&
+ FoldOp.getOpcode() != ISD::Constant &&
+ FoldOp.getOpcode() != ISD::ConstantFP)
+ break;
+ Ops.push_back(FoldOp);
+ AddToWorkList(FoldOp.getNode());
}
if (Ops.size() == LHS.getNumOperands()) {
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b4c3833..95f4d07 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -56,6 +56,27 @@
#include "FunctionLoweringInfo.h"
using namespace llvm;
+bool FastISel::hasTrivialKill(const Value *V) const {
+ // Don't consider constants or arguments to have trivial kills.
+ const Instruction *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+
+ // No-op casts are trivially coalesced by fast-isel.
+ if (const CastInst *Cast = dyn_cast<CastInst>(I))
+ if (Cast->isNoopCast(TD.getIntPtrType(Cast->getContext())) &&
+ !hasTrivialKill(Cast->getOperand(0)))
+ return false;
+
+ // Only instructions with a single use in the same basic block are considered
+ // to have trivial kills.
+ return I->hasOneUse() &&
+ !(I->getOpcode() == Instruction::BitCast ||
+ I->getOpcode() == Instruction::PtrToInt ||
+ I->getOpcode() == Instruction::IntToPtr) &&
+ cast<Instruction>(I->use_begin())->getParent() == I->getParent();
+}
+
unsigned FastISel::getRegForValue(const Value *V) {
EVT RealVT = TLI.getValueType(V->getType(), /*AllowUnknown=*/true);
// Don't handle non-simple values in FastISel.
@@ -78,12 +99,24 @@ unsigned FastISel::getRegForValue(const Value *V) {
// cache values defined by Instructions across blocks, and other values
// only locally. This is because Instructions already have the SSA
// def-dominates-use requirement enforced.
- if (ValueMap.count(V))
- return ValueMap[V];
+ DenseMap<const Value *, unsigned>::iterator I = ValueMap.find(V);
+ if (I != ValueMap.end())
+ return I->second;
unsigned Reg = LocalValueMap[V];
if (Reg != 0)
return Reg;
+ // In bottom-up mode, just create the virtual register which will be used
+ // to hold the value. It will be materialized later.
+ if (IsBottomUp) {
+ Reg = createResultReg(TLI.getRegClassFor(VT));
+ if (isa<Instruction>(V))
+ ValueMap[V] = Reg;
+ else
+ LocalValueMap[V] = Reg;
+ return Reg;
+ }
+
return materializeRegForValue(V, VT);
}
@@ -123,7 +156,8 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
unsigned IntegerReg =
getRegForValue(ConstantInt::get(V->getContext(), IntVal));
if (IntegerReg != 0)
- Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg);
+ Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP,
+ IntegerReg, /*Kill=*/false);
}
}
} else if (const Operator *Op = dyn_cast<Operator>(V)) {
@@ -174,25 +208,33 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
else if (Reg != AssignedReg) {
const TargetRegisterClass *RegClass = MRI.getRegClass(Reg);
TII.copyRegToReg(*MBB, MBB->end(), AssignedReg,
- Reg, RegClass, RegClass);
+ Reg, RegClass, RegClass, DL);
}
return AssignedReg;
}
-unsigned FastISel::getRegForGEPIndex(const Value *Idx) {
+std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) {
unsigned IdxN = getRegForValue(Idx);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
- return 0;
+ return std::pair<unsigned, bool>(0, false);
+
+ bool IdxNIsKill = hasTrivialKill(Idx);
// If the index is smaller or larger than intptr_t, truncate or extend it.
MVT PtrVT = TLI.getPointerTy();
EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
- if (IdxVT.bitsLT(PtrVT))
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN);
- else if (IdxVT.bitsGT(PtrVT))
- IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN);
- return IdxN;
+ if (IdxVT.bitsLT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ else if (IdxVT.bitsGT(PtrVT)) {
+ IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE,
+ IdxN, IdxNIsKill);
+ IdxNIsKill = true;
+ }
+ return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
}
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -224,10 +266,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
+
// Check if the second operand is a constant and handle it appropriately.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, CI->getZExtValue());
+ ISDOpcode, Op0, Op0IsKill,
+ CI->getZExtValue());
if (ResultReg != 0) {
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, ResultReg);
@@ -238,7 +283,7 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Check if the second operand is a constant float.
if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, CF);
+ ISDOpcode, Op0, Op0IsKill, CF);
if (ResultReg != 0) {
// We successfully emitted code for the given LLVM Instruction.
UpdateValueMap(I, ResultReg);
@@ -251,9 +296,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool Op1IsKill = hasTrivialKill(I->getOperand(1));
+
// Now we have both operands in registers. Emit the instruction.
unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(),
- ISDOpcode, Op0, Op1);
+ ISDOpcode,
+ Op0, Op0IsKill,
+ Op1, Op1IsKill);
if (ResultReg == 0)
// Target-specific code wasn't able to find a machine opcode for
// the given ISD opcode and type. Halt "fast" selection and bail.
@@ -270,6 +319,8 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool NIsKill = hasTrivialKill(I->getOperand(0));
+
const Type *Ty = I->getOperand(0)->getType();
MVT VT = TLI.getPointerTy();
for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1,
@@ -282,10 +333,11 @@ bool FastISel::SelectGetElementPtr(const User *I) {
uint64_t Offs = TD.getStructLayout(StTy)->getElementOffset(Field);
// FIXME: This can be optimized by combining the add with a
// subsequent one.
- N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ NIsKill = true;
}
Ty = StTy->getElementType(Field);
} else {
@@ -296,27 +348,31 @@ bool FastISel::SelectGetElementPtr(const User *I) {
if (CI->getZExtValue() == 0) continue;
uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
- N = FastEmit_ri_(VT, ISD::ADD, N, Offs, VT);
+ N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ NIsKill = true;
continue;
}
// N = N + Idx * ElementSize;
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
- unsigned IdxN = getRegForGEPIndex(Idx);
+ std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
+ unsigned IdxN = Pair.first;
+ bool IdxNIsKill = Pair.second;
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
if (ElementSize != 1) {
- IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, ElementSize, VT);
+ IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT);
if (IdxN == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ IdxNIsKill = true;
}
- N = FastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
+ N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
if (N == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -338,7 +394,7 @@ bool FastISel::SelectCall(const User *I) {
default: break;
case Intrinsic::dbg_declare: {
const DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI->getVariable(), CodeGenOpt::None) ||
+ if (!DIVariable(DI->getVariable()).Verify() ||
!MF.getMMI().hasDebugInfo())
return true;
@@ -402,7 +458,7 @@ bool FastISel::SelectCall(const User *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
unsigned ResultReg = createResultReg(RC);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Reg, RC, RC);
+ Reg, RC, RC, DL);
assert(InsertedCopy && "Can't copy address registers!");
InsertedCopy = InsertedCopy;
UpdateValueMap(I, ResultReg);
@@ -432,17 +488,19 @@ bool FastISel::SelectCall(const User *I) {
const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT);
unsigned ResultReg = createResultReg(RC);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg,
- RC, RC);
+ RC, RC, DL);
assert(InsertedCopy && "Can't copy address registers!");
InsertedCopy = InsertedCopy;
+ bool ResultRegIsKill = hasTrivialKill(I);
+
// Cast the register to the type of the selector.
if (SrcVT.bitsGT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE,
- ResultReg);
+ ResultReg, ResultRegIsKill);
else if (SrcVT.bitsLT(MVT::i32))
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32,
- ISD::SIGN_EXTEND, ResultReg);
+ ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill);
if (ResultReg == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
@@ -490,12 +548,15 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
// Unhandled operand. Halt "fast" selection and bail.
return false;
+ bool InputRegIsKill = hasTrivialKill(I->getOperand(0));
+
// If the operand is i1, arrange for the high bits in the register to be zero.
if (SrcVT == MVT::i1) {
SrcVT = TLI.getTypeToTransformTo(I->getContext(), SrcVT);
- InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg);
+ InputReg = FastEmitZExtFromI1(SrcVT.getSimpleVT(), InputReg, InputRegIsKill);
if (!InputReg)
return false;
+ InputRegIsKill = true;
}
// If the result is i1, truncate to the target's type for i1 first.
if (DstVT == MVT::i1)
@@ -504,7 +565,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(),
DstVT.getSimpleVT(),
Opcode,
- InputReg);
+ InputReg, InputRegIsKill);
if (!ResultReg)
return false;
@@ -536,6 +597,8 @@ bool FastISel::SelectBitCast(const User *I) {
if (Op0 == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
+
+ bool Op0IsKill = hasTrivialKill(I->getOperand(0));
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
@@ -545,7 +608,7 @@ bool FastISel::SelectBitCast(const User *I) {
ResultReg = createResultReg(DstClass);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- Op0, DstClass, SrcClass);
+ Op0, DstClass, SrcClass, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -553,7 +616,7 @@ bool FastISel::SelectBitCast(const User *I) {
// If the reg-reg copy failed, select a BIT_CONVERT opcode.
if (!ResultReg)
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- ISD::BIT_CONVERT, Op0);
+ ISD::BIT_CONVERT, Op0, Op0IsKill);
if (!ResultReg)
return false;
@@ -609,10 +672,12 @@ FastISel::SelectFNeg(const User *I) {
unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I));
if (OpReg == 0) return false;
+ bool OpRegIsKill = hasTrivialKill(I);
+
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(I->getType());
unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(),
- ISD::FNEG, OpReg);
+ ISD::FNEG, OpReg, OpRegIsKill);
if (ResultReg != 0) {
UpdateValueMap(I, ResultReg);
return true;
@@ -626,18 +691,19 @@ FastISel::SelectFNeg(const User *I) {
return false;
unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BIT_CONVERT, OpReg);
+ ISD::BIT_CONVERT, OpReg, OpRegIsKill);
if (IntReg == 0)
return false;
- unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, IntReg,
+ unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR,
+ IntReg, /*Kill=*/true,
UINT64_C(1) << (VT.getSizeInBits()-1),
IntVT.getSimpleVT());
if (IntResultReg == 0)
return false;
ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BIT_CONVERT, IntResultReg);
+ ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
@@ -782,7 +848,8 @@ FastISel::FastISel(MachineFunction &mf,
TM(MF.getTarget()),
TD(*TM.getTargetData()),
TII(*TM.getInstrInfo()),
- TLI(*TM.getTargetLowering()) {
+ TLI(*TM.getTargetLowering()),
+ IsBottomUp(false) {
}
FastISel::~FastISel() {}
@@ -793,13 +860,15 @@ unsigned FastISel::FastEmit_(MVT, MVT,
}
unsigned FastISel::FastEmit_r(MVT, MVT,
- unsigned, unsigned /*Op0*/) {
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/) {
return 0;
}
unsigned FastISel::FastEmit_rr(MVT, MVT,
- unsigned, unsigned /*Op0*/,
- unsigned /*Op0*/) {
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/) {
return 0;
}
@@ -813,20 +882,23 @@ unsigned FastISel::FastEmit_f(MVT, MVT,
}
unsigned FastISel::FastEmit_ri(MVT, MVT,
- unsigned, unsigned /*Op0*/,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
uint64_t /*Imm*/) {
return 0;
}
unsigned FastISel::FastEmit_rf(MVT, MVT,
- unsigned, unsigned /*Op0*/,
+ unsigned,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
const ConstantFP * /*FPImm*/) {
return 0;
}
unsigned FastISel::FastEmit_rri(MVT, MVT,
unsigned,
- unsigned /*Op0*/, unsigned /*Op1*/,
+ unsigned /*Op0*/, bool /*Op0IsKill*/,
+ unsigned /*Op1*/, bool /*Op1IsKill*/,
uint64_t /*Imm*/) {
return 0;
}
@@ -836,16 +908,18 @@ unsigned FastISel::FastEmit_rri(MVT, MVT,
/// If that fails, it materializes the immediate into a register and try
/// FastEmit_rr instead.
unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
- unsigned Op0, uint64_t Imm,
- MVT ImmType) {
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm, MVT ImmType) {
// First check if immediate type is legal. If not, we can't use the ri form.
- unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Imm);
+ unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm);
if (ResultReg != 0)
return ResultReg;
unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm);
if (MaterialReg == 0)
return 0;
- return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
}
/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries
@@ -853,10 +927,10 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode,
/// FastEmit_rf. If that fails, it materializes the immediate into a register
/// and try FastEmit_rr instead.
unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
- unsigned Op0, const ConstantFP *FPImm,
- MVT ImmType) {
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm, MVT ImmType) {
// First check if immediate type is legal. If not, we can't use the rf form.
- unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, FPImm);
+ unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm);
if (ResultReg != 0)
return ResultReg;
@@ -886,11 +960,13 @@ unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode,
if (IntegerReg == 0)
return 0;
MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT,
- ISD::SINT_TO_FP, IntegerReg);
+ ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true);
if (MaterialReg == 0)
return 0;
}
- return FastEmit_rr(VT, VT, Opcode, Op0, MaterialReg);
+ return FastEmit_rr(VT, VT, Opcode,
+ Op0, Op0IsKill,
+ MaterialReg, /*Kill=*/true);
}
unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
@@ -908,16 +984,16 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0) {
+ unsigned Op0, bool Op0IsKill) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0);
+ BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0);
+ BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -927,16 +1003,21 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, unsigned Op1) {
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -945,16 +1026,21 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, uint64_t Imm) {
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Imm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addImm(Imm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -963,16 +1049,21 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, const ConstantFP *FPImm) {
+ unsigned Op0, bool Op0IsKill,
+ const ConstantFP *FPImm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addFPImm(FPImm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addFPImm(FPImm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addFPImm(FPImm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -981,16 +1072,24 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
const TargetRegisterClass *RC,
- unsigned Op0, unsigned Op1, uint64_t Imm) {
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill,
+ uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addReg(Op1).addImm(Imm);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addReg(Op1).addImm(Imm);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addReg(Op1, Op1IsKill * RegState::Kill)
+ .addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1008,7 +1107,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
else {
BuildMI(MBB, DL, II).addImm(Imm);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1016,18 +1115,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
}
unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
- unsigned Op0, uint32_t Idx) {
+ unsigned Op0, bool Op0IsKill,
+ uint32_t Idx) {
const TargetRegisterClass* RC = MRI.getRegClass(Op0);
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG);
if (II.getNumDefs() >= 1)
- BuildMI(MBB, DL, II, ResultReg).addReg(Op0).addImm(Idx);
+ BuildMI(MBB, DL, II, ResultReg)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Idx);
else {
- BuildMI(MBB, DL, II).addReg(Op0).addImm(Idx);
+ BuildMI(MBB, DL, II)
+ .addReg(Op0, Op0IsKill * RegState::Kill)
+ .addImm(Idx);
bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- II.ImplicitDefs[0], RC, RC);
+ II.ImplicitDefs[0], RC, RC, DL);
if (!InsertedCopy)
ResultReg = 0;
}
@@ -1036,8 +1140,8 @@ unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT,
/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op
/// with all but the least significant bit set to zero.
-unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op) {
- return FastEmit_ri(VT, VT, ISD::AND, Op, 1);
+unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) {
+ return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1);
}
/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks.
@@ -1070,6 +1174,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
// emitted yet.
for (BasicBlock::const_iterator I = SuccBB->begin();
const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
+
// Ignore dead phi's.
if (PN->use_empty()) continue;
@@ -1092,12 +1197,19 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
+ // Set the DebugLoc for the copy. Prefer the location of the operand
+ // if there is one; use the location of the PHI otherwise.
+ DL = PN->getDebugLoc();
+ if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp))
+ DL = Inst->getDebugLoc();
+
unsigned Reg = getRegForValue(PHIOp);
if (Reg == 0) {
PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
return false;
}
PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
+ DL = DebugLoc();
}
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c5dae82..16eb8a7 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -143,7 +143,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// Create the reg, emit the copy.
VRBase = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Node->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
@@ -265,7 +265,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug) {
+ bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
@@ -289,7 +289,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) {
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Op.getNode()->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
VReg = NewVReg;
@@ -297,15 +297,25 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
}
// If this value has only one use, that use is a kill. This is a
- // conservative approximation. Tied operands are never killed, so we need
- // to check that. And that means we need to determine the index of the
- // operand.
- unsigned Idx = MI->getNumOperands();
- while (Idx > 0 &&
- MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
- --Idx;
- bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
- bool isKill = Op.hasOneUse() && !isTied && !IsDebug;
+ // conservative approximation. InstrEmitter does trivial coalescing
+ // with CopyFromReg nodes, so don't emit kill flags for them.
+ // Avoid kill flags on Schedule cloned nodes, since there will be
+ // multiple uses.
+ // Tied operands are never killed, so we need to check that. And that
+ // means we need to determine the index of the operand.
+ bool isKill = Op.hasOneUse() &&
+ Op.getNode()->getOpcode() != ISD::CopyFromReg &&
+ !IsDebug &&
+ !(IsClone || IsCloned);
+ if (isKill) {
+ unsigned Idx = MI->getNumOperands();
+ while (Idx > 0 &&
+ MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
+ --Idx;
+ bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+ if (isTied)
+ isKill = false;
+ }
MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
false/*isImp*/, isKill,
@@ -322,9 +332,10 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug) {
+ bool IsDebug, bool IsClone, bool IsCloned) {
if (Op.isMachineOpcode()) {
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
} else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
@@ -373,7 +384,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
+ IsDebug, IsClone, IsCloned);
}
}
@@ -395,7 +407,8 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC,
/// EmitSubregNode - Generate machine code for subreg nodes.
///
void InstrEmitter::EmitSubregNode(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap){
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
unsigned VRBase = 0;
unsigned Opc = Node->getMachineOpcode();
@@ -439,7 +452,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Add def, source, and subreg index
MI->addOperand(MachineOperand::CreateReg(VRBase, true));
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap);
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
MI->addOperand(MachineOperand::CreateImm(SubIdx));
MBB->insert(InsertPos, MI);
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
@@ -473,9 +487,11 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
const ConstantSDNode *SD = cast<ConstantSDNode>(N0);
MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue()));
} else
- AddOperand(MI, N0, 0, 0, VRBaseMap);
+ AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
// Add the subregster being inserted
- AddOperand(MI, N1, 0, 0, VRBaseMap);
+ AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
MI->addOperand(MachineOperand::CreateImm(SubIdx));
MBB->insert(InsertPos, MI);
} else
@@ -503,7 +519,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
// Create the new VReg in the destination class and emit a copy.
unsigned NewVReg = MRI->createVirtualRegister(DstRC);
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg,
- DstRC, SrcRC);
+ DstRC, SrcRC, Node->getDebugLoc());
assert(Emitted &&
"Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n");
(void) Emitted;
@@ -517,7 +533,8 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
void InstrEmitter::EmitRegSequence(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned) {
const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
unsigned NewVReg = MRI->createVirtualRegister(RC);
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -528,17 +545,21 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
for (unsigned i = 0; i != NumOps; ++i) {
SDValue Op = Node->getOperand(i);
-#ifndef NDEBUG
if (i & 1) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
- const TargetRegisterClass *SRC =
- getSuperRegisterRegClass(TRC, SubIdx, Node->getValueType(0));
- assert(SRC == RC && "Invalid subregister index in REG_SEQUENCE");
+ const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
+ const TargetRegisterClass *SRC =
+ TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
+ if (!SRC)
+ llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
+ if (SRC != RC) {
+ MRI->setRegClass(NewVReg, SRC);
+ RC = SRC;
+ }
}
-#endif
- AddOperand(MI, Op, i+1, &II, VRBaseMap);
+ AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
}
MBB->insert(InsertPos, MI);
@@ -579,11 +600,17 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
MIB.addReg(0U); // undef
else
AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap,
- true /*IsDebug*/);
+ /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false);
} else if (SD->getKind() == SDDbgValue::CONST) {
const Value *V = SD->getConst();
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- MIB.addImm(CI->getSExtValue());
+ // FIXME: SDDbgValues aren't updated with legalization, so it's possible
+ // to have i128 values in them at this point. As a crude workaround, just
+ // drop the debug info if this happens.
+ if (!CI->getValue().isSignedIntN(64))
+ MIB.addReg(0U);
+ else
+ MIB.addImm(CI->getSExtValue());
} else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
MIB.addFPImm(CF);
} else {
@@ -612,7 +639,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
- EmitSubregNode(Node, VRBaseMap);
+ EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned);
return;
}
@@ -624,7 +651,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// Handle REG_SEQUENCE specially.
if (Opc == TargetOpcode::REG_SEQUENCE) {
- EmitRegSequence(Node, VRBaseMap);
+ EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned);
return;
}
@@ -663,7 +690,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
for (unsigned i = NumSkip; i != NodeOperands; ++i)
AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
- VRBaseMap);
+ VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned);
// Transfer all of the memory reference descriptions of this instruction.
MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
@@ -749,7 +776,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
Node->getOperand(1).getValueType());
bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg,
- DstTRC, SrcTRC);
+ DstTRC, SrcTRC, Node->getDebugLoc());
assert(Emitted && "Unable to issue a copy instruction!\n");
(void) Emitted;
break;
@@ -810,7 +837,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// The addressing mode has been selected, just add all of the
// operands to the machine instruction.
for (; NumVals; --NumVals, ++i)
- AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap);
+ AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap,
+ /*IsDebug=*/false, IsClone, IsCloned);
break;
}
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index c7e7c71..02c044c 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -65,7 +65,7 @@ class InstrEmitter {
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug = false);
+ bool IsDebug, bool IsClone, bool IsCloned);
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
@@ -75,11 +75,12 @@ class InstrEmitter {
unsigned IIOpNum,
const TargetInstrDesc *II,
DenseMap<SDValue, unsigned> &VRBaseMap,
- bool IsDebug = false);
+ bool IsDebug, bool IsClone, bool IsCloned);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
- void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes.
/// COPY_TO_REGCLASS is just a normal copy, except that the destination
@@ -90,7 +91,8 @@ class InstrEmitter {
/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes.
///
- void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap);
+ void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsClone, bool IsCloned);
public:
/// CountResults - The results of target nodes have register or immediate
/// operands first, then an optional chain, and optional flag operands
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bedfa57..62a37a5 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -23,7 +23,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtarget.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -2027,6 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return Result;
}
assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet");
+ // Code below here assumes !isSigned without checking again.
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation has the advantage
@@ -2052,6 +2052,41 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
+ // Implementation of unsigned i64 to f32. This implementation has the
+ // advantage of performing rounding correctly.
+ // TODO: Generalize this for use with other types.
+ if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ EVT SHVT = TLI.getShiftAmountTy();
+
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64));
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And,
+ DAG.getConstant(UINT64_C(0x800), MVT::i64));
+ SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
+ DAG.getConstant(UINT64_C(0x7ff), MVT::i64));
+ SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE);
+ SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
+ SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
+ ISD::SETUGE);
+ SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
+
+ SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
+ DAG.getConstant(32, SHVT));
+ SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh);
+ SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc);
+ SDValue TwoP32 =
+ DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), MVT::f64);
+ SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt);
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2);
+ SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo);
+ SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
+ return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
+ DAG.getIntPtrConstant(0));
+
+ }
+
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()),
@@ -2488,6 +2523,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
EVT VT = Node->getValueType(0);
EVT EltVT = VT.getVectorElementType();
+ if (getTypeAction(EltVT) == Promote)
+ EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
unsigned NumElems = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElems; ++i) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 548454c..8b382bc 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2314,13 +2314,29 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL);
}
+static const fltSemantics *EVTToAPFloatSemantics(EVT VT) {
+ switch (VT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unknown FP format");
+ case MVT::f32: return &APFloat::IEEEsingle;
+ case MVT::f64: return &APFloat::IEEEdouble;
+ case MVT::f80: return &APFloat::x87DoubleExtended;
+ case MVT::f128: return &APFloat::IEEEquad;
+ case MVT::ppcf128: return &APFloat::PPCDoubleDouble;
+ }
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
SDValue Op = N->getOperand(0);
EVT SrcVT = Op.getValueType();
EVT DstVT = N->getValueType(0);
DebugLoc dl = N->getDebugLoc();
- if (TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
+ // The following optimization is valid only if every value in SrcVT (when
+ // treated as signed) is representable in DstVT. Check that the mantissa
+ // size of DstVT is >= than the number of bits in SrcVT -1.
+ const fltSemantics *sem = EVTToAPFloatSemantics(DstVT);
+ if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 &&
+ TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){
// Do a signed conversion then adjust the result.
SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op);
SignedConv = TLI.LowerOperation(SignedConv, DAG);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d60ad60..c665963 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -32,7 +32,7 @@ namespace llvm {
/// involves promoting small sizes to large sizes or splitting up large values
/// into small values.
///
-class VISIBILITY_HIDDEN DAGTypeLegalizer {
+class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
const TargetLowering &TLI;
SelectionDAG &DAG;
public:
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index b92a672..56f5ded 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -30,7 +30,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/Statistic.h"
#include <climits>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index da02850..820ba66 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -24,7 +24,6 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
@@ -53,6 +52,12 @@ static RegisterScheduler
"order when possible",
createSourceListDAGScheduler);
+static RegisterScheduler
+ hybridListDAGScheduler("list-hybrid",
+ "Bottom-up rr list scheduling which avoid stalls for "
+ "long latency instructions",
+ createHybridListDAGScheduler);
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -64,6 +69,10 @@ private:
/// it is top-down.
bool isBottomUp;
+ /// NeedLatency - True if the scheduler will make use of latency information.
+ ///
+ bool NeedLatency;
+
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
@@ -80,9 +89,9 @@ private:
public:
ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup,
+ bool isbottomup, bool needlatency,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup),
+ : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
AvailableQueue(availqueue), Topo(SUnits) {
}
@@ -161,9 +170,11 @@ private:
return NewNode;
}
- /// ForceUnitLatencies - Return true, since register-pressure-reducing
- /// scheduling doesn't need actual latency information.
- bool ForceUnitLatencies() const { return true; }
+ /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't
+ /// need actual latency information but the hybrid scheduler does.
+ bool ForceUnitLatencies() const {
+ return !NeedLatency;
+ }
};
} // end anonymous namespace
@@ -213,6 +224,12 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
#endif
--PredSU->NumSuccsLeft;
+ if (!ForceUnitLatencies()) {
+ // Updating predecessor's height. This is now the cycle when the
+ // predecessor can be scheduled without causing a pipeline stall.
+ PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
+ }
+
// If all the node's successors are scheduled, this node is ready
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
@@ -244,10 +261,15 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
- DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
+ DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
- assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!");
+#ifndef NDEBUG
+ if (CurCycle < SU->getHeight())
+ DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
+#endif
+
+ // FIXME: Handle noop hazard.
SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU);
@@ -339,6 +361,7 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
SU->isAvailable = false;
UnscheduleNodeBottomUp(OldSU);
--CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
@@ -386,7 +409,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
return NULL;
- DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n");
+ DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
assert(NewNodes.size() == 2 && "Expected a load folding node!");
N = NewNodes[1];
@@ -504,7 +527,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
SU = NewSU;
}
- DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n");
+ DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n");
NewSU = CreateClone(SU);
// New SUnit has the exact same predecessors.
@@ -786,7 +809,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// Issue copies, these can be expensive cross register class copies.
SmallVector<SUnit*, 2> Copies;
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << "Adding an edge from SU #" << TrySU->NodeNum
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
/*Reg=*/0, /*isNormalMemory=*/false,
@@ -795,7 +818,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
NewDef = Copies.back();
}
- DEBUG(dbgs() << "Adding an edge from SU #" << NewDef->NodeNum
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
@@ -821,6 +844,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
if (CurSU)
ScheduleNodeBottomUp(CurSU, CurCycle);
++CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
@@ -889,6 +913,7 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
unsigned CurCycle = 0;
+ AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
ReleaseSuccessors(&EntrySU);
@@ -911,6 +936,7 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
if (CurSU)
ScheduleNodeTopDown(CurSU, CurCycle);
++CurCycle;
+ AvailableQueue->setCurCycle(CurCycle);
}
#ifndef NDEBUG
@@ -956,6 +982,16 @@ namespace {
bool operator()(const SUnit* left, const SUnit* right) const;
};
+
+ struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
+ hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+ };
} // end anonymous namespace
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
@@ -990,8 +1026,9 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
namespace {
template<class SF>
class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- PriorityQueue<SUnit*, std::vector<SUnit*>, SF> Queue;
- unsigned currentQueueId;
+ std::vector<SUnit*> Queue;
+ SF Picker;
+ unsigned CurQueueId;
protected:
// SUnits - The SUnits for the current graph.
@@ -1007,7 +1044,7 @@ namespace {
public:
RegReductionPriorityQueue(const TargetInstrInfo *tii,
const TargetRegisterInfo *tri)
- : Queue(SF(this)), currentQueueId(0),
+ : Picker(this), CurQueueId(0),
TII(tii), TRI(tri), scheduleDAG(NULL) {}
void initNodes(std::vector<SUnit> &sunits) {
@@ -1067,26 +1104,26 @@ namespace {
unsigned getNodeOrdering(const SUnit *SU) const {
return scheduleDAG->DAG->GetOrdering(SU->getNode());
}
-
- unsigned size() const { return Queue.size(); }
bool empty() const { return Queue.empty(); }
void push(SUnit *U) {
assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++currentQueueId;
- Queue.push(U);
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
}
- void push_all(const std::vector<SUnit *> &Nodes) {
- for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
- push(Nodes[i]);
- }
-
SUnit *pop() {
if (empty()) return NULL;
- SUnit *V = Queue.top();
- Queue.pop();
+ std::vector<SUnit *>::iterator Best = Queue.begin();
+ for (std::vector<SUnit *>::iterator I = next(Queue.begin()),
+ E = Queue.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Queue.end()))
+ std::swap(*Best, Queue.back());
+ Queue.pop_back();
V->NodeQueueId = 0;
return V;
}
@@ -1094,7 +1131,11 @@ namespace {
void remove(SUnit *SU) {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
- Queue.erase_one(SU);
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
SU->NodeQueueId = 0;
}
@@ -1117,6 +1158,9 @@ namespace {
typedef RegReductionPriorityQueue<src_ls_rr_sort>
SrcRegReductionPriorityQueue;
+
+ typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+ HybridBURRPriorityQueue;
}
/// closestSucc - Returns the scheduled cycle of the successor which is
@@ -1203,7 +1247,7 @@ bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1215,6 +1259,25 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
return BURRSort(left, right, SPQ);
}
+bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+ bool LStall = left->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < left->getHeight();
+ bool RStall = right->SchedulingPref == Sched::Latency &&
+ SPQ->getCurCycle() < right->getHeight();
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort them
+ // according to their height.
+ // If neither will cause a pipeline stall, try to reduce register pressure.
+ if (LStall) {
+ if (!RStall)
+ return true;
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
+ } else if (RStall)
+ return false;
+ return BURRSort(left, right, SPQ);
+}
+
template<class SF>
bool
RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
@@ -1379,8 +1442,8 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
// Ok, the transformation is safe and the heuristics suggest it is
// profitable. Update the graph.
- DEBUG(dbgs() << "Prescheduling SU # " << SU->NodeNum
- << " next to PredSU # " << PredSU->NodeNum
+ DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum
+ << " next to PredSU #" << PredSU->NodeNum
<< " to guide scheduling in the presence of multiple uses\n");
for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
SDep Edge = PredSU->Succs[i];
@@ -1469,7 +1532,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
(hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
- DEBUG(dbgs() << "Adding a pseudo-two-addr edge from SU # "
+ DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
<< SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0,
/*Reg=*/0, /*isNormalMemory=*/false,
@@ -1563,8 +1626,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
@@ -1577,8 +1639,7 @@ llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
TDRegReductionPriorityQueue *PQ = new TDRegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
@@ -1591,8 +1652,20 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(TII, TRI);
- ScheduleDAGRRList *SD =
- new ScheduleDAGRRList(*IS->MF, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ PQ->setScheduleDAG(SD);
+ return SD;
+}
+
+llvm::ScheduleDAGSDNodes *
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+ const TargetMachine &TM = IS->TM;
+ const TargetInstrInfo *TII = TM.getInstrInfo();
+ const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+
+ HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(TII, TRI);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 76e4771..3185c88 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtarget.h"
#include "llvm/ADT/DenseMap.h"
@@ -44,6 +45,24 @@ void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb,
ScheduleDAG::Run(bb, insertPos);
}
+/// NewSUnit - Creates a new SUnit and return a ptr to it.
+///
+SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) {
+#ifndef NDEBUG
+ const SUnit *Addr = 0;
+ if (!SUnits.empty())
+ Addr = &SUnits[0];
+#endif
+ SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
+ assert((Addr == 0 || Addr == &SUnits[0]) &&
+ "SUnits std::vector reallocated on the fly!");
+ SUnits.back().OrigNode = &SUnits.back();
+ SUnit *SU = &SUnits.back();
+ const TargetLowering &TLI = DAG->getTargetLoweringInfo();
+ SU->SchedulingPref = TLI.getSchedulingPreference(N);
+ return SU;
+}
+
SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
@@ -52,6 +71,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
SU->hasPhysRegClobbers = Old->hasPhysRegClobbers;
+ SU->SchedulingPref = Old->SchedulingPref;
Old->isCloned = true;
return SU;
}
@@ -217,9 +237,6 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
- // Check to see if the scheduler cares about latencies.
- bool UnitLatencies = ForceUnitLatencies();
-
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
@@ -282,10 +299,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
N->setNodeId(NodeSUnit->NodeNum);
// Assign the Latency field of NodeSUnit using target-provided information.
- if (UnitLatencies)
- NodeSUnit->Latency = 1;
- else
- ComputeLatency(NodeSUnit);
+ ComputeLatency(NodeSUnit);
}
}
@@ -353,7 +367,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpSU->Latency, PhysReg);
if (!isChain && !UnitLatencies) {
- ComputeOperandLatency(OpSU, SU, const_cast<SDep &>(dep));
+ ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep));
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
@@ -377,7 +391,17 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
}
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies()) {
+ SU->Latency = 1;
+ return;
+ }
+
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty()) {
+ SU->Latency = 1;
+ return;
+ }
// Compute the latency for the node. We use the sum of the latencies for
// all nodes flagged together into this SUnit.
@@ -389,6 +413,37 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
}
}
+void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const{
+ // Check to see if the scheduler cares about latencies.
+ if (ForceUnitLatencies())
+ return;
+
+ const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
+ if (InstrItins.isEmpty())
+ return;
+
+ if (dep.getKind() != SDep::Data)
+ return;
+
+ unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
+ if (Def->isMachineOpcode() && Use->isMachineOpcode()) {
+ const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+ if (DefIdx >= II.getNumDefs())
+ return;
+ int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
+ if (DefCycle < 0)
+ return;
+ const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
+ int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
+ if (UseCycle >= 0) {
+ int Latency = DefCycle - UseCycle + 1;
+ if (Latency >= 0)
+ dep.setLatency(Latency);
+ }
+ }
+}
+
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
if (!SU->getNode()) {
dbgs() << "PHYS REG COPY\n";
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 7ae8ec2..e8714ba 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -66,18 +66,7 @@ namespace llvm {
/// NewSUnit - Creates a new SUnit and return a ptr to it.
///
- SUnit *NewSUnit(SDNode *N) {
-#ifndef NDEBUG
- const SUnit *Addr = 0;
- if (!SUnits.empty())
- Addr = &SUnits[0];
-#endif
- SUnits.push_back(SUnit(N, (unsigned)SUnits.size()));
- assert((Addr == 0 || Addr == &SUnits[0]) &&
- "SUnits std::vector reallocated on the fly!");
- SUnits.back().OrigNode = &SUnits.back();
- return &SUnits.back();
- }
+ SUnit *NewSUnit(SDNode *N);
/// Clone - Creates a clone of the specified SUnit. It does not copy the
/// predecessors / successors info nor the temporary scheduling states.
@@ -94,6 +83,15 @@ namespace llvm {
///
virtual void ComputeLatency(SUnit *SU);
+ /// ComputeOperandLatency - Override dependence edge latency using
+ /// operand use/def information
+ ///
+ virtual void ComputeOperandLatency(SUnit *Def, SUnit *Use,
+ SDep& dep) const { }
+
+ virtual void ComputeOperandLatency(SDNode *Def, SDNode *Use,
+ unsigned OpIdx, SDep& dep) const;
+
virtual MachineBasicBlock *EmitSchedule();
/// Schedule - Order nodes according to selected style, filling
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e6df742..38bf68b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -15,6 +15,7 @@
#include "SDNodeOrdering.h"
#include "SDNodeDbgValue.h"
#include "llvm/Constants.h"
+#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Function.h"
#include "llvm/GlobalAlias.h"
@@ -32,6 +33,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
@@ -789,7 +791,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli)
- : TM(tm), TLI(*tm.getTargetLowering()), FLI(fli),
+ : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()),
+ FLI(fli),
EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)),
Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
@@ -963,8 +966,18 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
EVT EltVT = VT.getScalarType();
if (EltVT==MVT::f32)
return getConstantFP(APFloat((float)Val), VT, isTarget);
- else
+ else if (EltVT==MVT::f64)
return getConstantFP(APFloat(Val), VT, isTarget);
+ else if (EltVT==MVT::f80 || EltVT==MVT::f128) {
+ bool ignored;
+ APFloat apf = APFloat(Val);
+ apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven,
+ &ignored);
+ return getConstantFP(apf, VT, isTarget);
+ } else {
+ assert(0 && "Unsupported type in getConstantFP");
+ return SDValue();
+ }
}
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
@@ -2614,7 +2627,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
break;
case ISD::AND:
- assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's
// worth handling here.
@@ -2627,7 +2641,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::XOR:
case ISD::ADD:
case ISD::SUB:
- assert(VT.isInteger() && N1.getValueType() == N2.getValueType() &&
+ assert(VT.isInteger() && "This operator does not apply to FP types!");
+ assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
// (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so
// it's worth handling here.
@@ -2642,7 +2657,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::SDIV:
case ISD::SREM:
assert(VT.isInteger() && "This operator does not apply to FP types!");
- // fall through
+ assert(N1.getValueType() == N2.getValueType() &&
+ N1.getValueType() == VT && "Binary operator types must match!");
+ break;
case ISD::FADD:
case ISD::FSUB:
case ISD::FMUL:
@@ -2665,6 +2682,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
return N1;
}
}
+ assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
break;
@@ -3525,7 +3543,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memcpy with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
+ TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.getNode())
@@ -3590,7 +3608,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memmove with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.getNode())
return Result;
@@ -3641,7 +3659,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
// Then check to see if we should lower the memset with target-specific
// code. If the target chooses to do this, this is the next best.
SDValue Result =
- TLI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
+ TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
DstSV, DstSVOff);
if (Result.getNode())
return Result;
@@ -5417,6 +5435,8 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
+ assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ "Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
}
@@ -5607,6 +5627,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::LSDAADDR: return "LSDAADDR";
case ISD::EHSELECTION: return "EHSELECTION";
case ISD::EH_RETURN: return "EH_RETURN";
+ case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
+ case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -6008,6 +6030,21 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
+
+ DebugLoc dl = getDebugLoc();
+ if (G && !dl.isUnknown()) {
+ DIScope
+ Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext()));
+ OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
+ if (Scope.Verify())
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << dl.getLine();
+ if (dl.getCol() != 0)
+ OS << ':' << dl.getCol();
+ }
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a38b204..fbe601f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3726,6 +3726,12 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
return true;
}
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp)
+#define setjmp_undefined_for_visual_studio
+#undef setjmp
+#endif
+
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
/// we want to emit this as a call to a named external function, return the name
/// otherwise lower it and return null.
@@ -3818,7 +3824,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ if (!DIVariable(DI.getVariable()).Verify())
return 0;
MDNode *Variable = DI.getVariable();
@@ -3881,7 +3887,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
- if (!DIDescriptor::ValidDebugInfo(DI.getVariable(), CodeGenOpt::None))
+ if (!DIVariable(DI.getVariable()).Verify())
return 0;
MDNode *Variable = DI.getVariable();
@@ -3900,6 +3906,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, 0, false);
} else {
+ bool createUndef = false;
+ // FIXME : Why not use getValue() directly ?
SDValue &N = NodeMap[V];
if (N.getNode()) {
if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
@@ -3907,7 +3915,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else {
+ } else if (isa<PHINode>(V) && !V->use_empty()) {
+ SDValue N = getValue(V);
+ if (N.getNode()) {
+ if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
+ SDV = DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ }
+ } else
+ createUndef = true;
+ } else
+ createUndef = true;
+ if (createUndef) {
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter; we need this fallback.
SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
@@ -4018,6 +4038,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MMI.setCurrentCallSite(CI->getZExtValue());
return 0;
}
+ case Intrinsic::eh_sjlj_setjmp: {
+ setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
+ getValue(I.getOperand(1))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_longjmp: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
+ getRoot(),
+ getValue(I.getOperand(1))));
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
@@ -4924,7 +4955,7 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF,
namespace llvm {
/// AsmOperandInfo - This contains information for each constraint that we are
/// lowering.
-class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
+class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
public TargetLowering::AsmOperandInfo {
public:
/// CallOperand - If this is the result output operand or a clobber
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 422cb7a..65b8d4f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -25,9 +25,11 @@
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/GCMetadata.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -131,11 +133,13 @@ namespace llvm {
if (OptLevel == CodeGenOpt::None)
return createFastDAGScheduler(IS, OptLevel);
- if (TLI.getSchedulingPreference() == TargetLowering::SchedulingForLatency)
+ if (TLI.getSchedulingPreference() == Sched::Latency)
return createTDListDAGScheduler(IS, OptLevel);
- assert(TLI.getSchedulingPreference() ==
- TargetLowering::SchedulingForRegPressure && "Unknown sched type!");
- return createBURRListDAGScheduler(IS, OptLevel);
+ if (TLI.getSchedulingPreference() == Sched::RegPressure)
+ return createBURRListDAGScheduler(IS, OptLevel);
+ assert(TLI.getSchedulingPreference() == Sched::Hybrid &&
+ "Unknown sched type!");
+ return createHybridListDAGScheduler(IS, OptLevel);
}
}
@@ -188,6 +192,39 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+/// FunctionCallsSetJmp - Return true if the function has a call to setjmp or
+/// other function that gcc recognizes as "returning twice". This is used to
+/// limit code-gen optimizations on the machine function.
+///
+/// FIXME: Remove after <rdar://problem/8031714> is fixed.
+static bool FunctionCallsSetJmp(const Function *F) {
+ const Module *M = F->getParent();
+ static const char *ReturnsTwiceFns[] = {
+ "setjmp",
+ "sigsetjmp",
+ "setjmp_syscall",
+ "savectx",
+ "qsetjmp",
+ "vfork",
+ "getcontext"
+ };
+#define NUM_RETURNS_TWICE_FNS sizeof(ReturnsTwiceFns) / sizeof(const char *)
+
+ for (unsigned I = 0; I < NUM_RETURNS_TWICE_FNS; ++I)
+ if (const Function *Callee = M->getFunction(ReturnsTwiceFns[I])) {
+ if (!Callee->use_empty())
+ for (Value::const_use_iterator
+ I = Callee->use_begin(), E = Callee->use_end();
+ I != E; ++I)
+ if (const CallInst *CI = dyn_cast<CallInst>(I))
+ if (CI->getParent()->getParent() == F)
+ return true;
+ }
+
+ return false;
+#undef NUM_RETURNS_TWICE_FNS
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -218,6 +255,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *EntryMBB = MF->begin();
RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII);
+ DenseMap<unsigned, unsigned> LiveInMap;
+ if (!FuncInfo->ArgDbgValues.empty())
+ for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
+ E = RegInfo->livein_end(); LI != E; ++LI)
+ if (LI->second)
+ LiveInMap.insert(std::make_pair(LI->first, LI->second));
+
// Insert DBG_VALUE instructions for function arguments to the entry block.
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
@@ -230,8 +274,44 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// FIXME: VR def may not be in entry block.
Def->getParent()->insert(llvm::next(InsertPos), MI);
}
+
+ // If Reg is live-in then update debug info to track its copy in a vreg.
+ DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg);
+ if (LDI != LiveInMap.end()) {
+ MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
+ MachineBasicBlock::iterator InsertPos = Def;
+ const MDNode *Variable =
+ MI->getOperand(MI->getNumOperands()-1).getMetadata();
+ unsigned Offset = MI->getOperand(1).getImm();
+ // Def is never a terminator here, so it is ok to increment InsertPos.
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(LDI->second, RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ }
+ }
+
+ // Determine if there are any calls in this machine function.
+ MachineFrameInfo *MFI = MF->getFrameInfo();
+ if (!MFI->hasCalls()) {
+ for (MachineFunction::const_iterator
+ I = MF->begin(), E = MF->end(); I != E; ++I) {
+ const MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::const_iterator
+ II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+ if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) {
+ MFI->setHasCalls(true);
+ goto done;
+ }
+ }
+ }
+ done:;
}
+ // Determine if there is a call to setjmp in the machine function.
+ MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn));
+
// Release function-specific state. SDB and CurDAG are already cleared
// at this point.
FuncInfo->clear();
@@ -662,6 +742,7 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->setRoot(Dummy.getValue());
}
+
DEBUG(errs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8a4a1b1..44a80d3 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -18,7 +18,6 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -544,7 +543,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
BooleanContents = UndefinedBooleanContent;
- SchedPreferenceInfo = SchedulingForLatency;
+ SchedPreferenceInfo = Sched::Latency;
JumpBufSize = 0;
JumpBufAlignment = 0;
IfCvtBlockSizeLimit = 2;
@@ -2417,7 +2416,7 @@ std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
getRegForInlineAsmConstraint(const std::string &Constraint,
EVT VT) const {
if (Constraint[0] != '{')
- return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ return std::make_pair(0u, static_cast<TargetRegisterClass*>(0));
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
@@ -2449,7 +2448,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
- return std::pair<unsigned, const TargetRegisterClass*>(0, 0);
+ return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
index d20477f..a081e3c 100644
--- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp
@@ -12,9 +12,11 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetSelectionDAGInfo.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-TargetSelectionDAGInfo::TargetSelectionDAGInfo() {
+TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM)
+ : TD(TM.getTargetData()) {
}
TargetSelectionDAGInfo::~TargetSelectionDAGInfo() {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 1f68a6f..ed3c243 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -460,7 +460,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
continue;
- if (DstReg == IntB.reg) {
+ if (DstReg == IntB.reg && DstSubIdx == 0) {
// This copy will become a noop. If it's defining a new val#,
// remove that val# as well. However this live range is being
// extended to the end of the existing live range defined by the copy.
@@ -624,9 +624,10 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
LR->valno->addKill(LastUseIdx.getDefIndex());
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- DstReg == li.reg) {
+ DstReg == li.reg && DstSubIdx == 0) {
// Last use is itself an identity code.
- int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg, false, tri_);
+ int DeadIdx = LastUseMI->findRegisterDefOperandIdx(li.reg,
+ false, false, tri_);
LastUseMI->getOperand(DeadIdx).setIsDead();
}
return true;
@@ -810,6 +811,8 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx;
if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg,
CopySrcSubIdx, CopyDstSubIdx) &&
+ CopySrcSubIdx == 0 &&
+ CopyDstSubIdx == 0 &&
CopySrcReg != CopyDstReg &&
CopySrcReg == SrcReg && CopyDstReg != UseDstReg) {
// If the use is a copy and it won't be coalesced away, and its source
@@ -835,8 +838,13 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0))))
UseMI->addRegisterKilled(DstReg, tri_, true);
}
- DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
- << "\t" << *UseMI);
+
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
continue;
}
@@ -845,14 +853,21 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg,
// EAX: 1 -> AL, 2 -> AX
// So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose
// sub-register 2 is also AX.
+ //
+ // FIXME: Properly compose subreg indices for all targets.
+ //
if (SubIdx && OldSubIdx && SubIdx != OldSubIdx)
- assert(OldSubIdx < SubIdx && "Conflicting sub-register index!");
+ ;
else if (SubIdx)
O.setSubReg(SubIdx);
O.setReg(DstReg);
- DEBUG(dbgs() << "\t\tupdated: " << li_->getInstructionIndex(UseMI)
- << "\t" << *UseMI);
+ DEBUG({
+ dbgs() << "\t\tupdated: ";
+ if (!UseMI->isDebugValue())
+ dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+ dbgs() << *UseMI;
+ });
// After updating the operand, check if the machine instruction has
// become a copy. If so, update its val# information.
@@ -938,7 +953,7 @@ static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI,
MachineInstr *DefMI =
li_->getInstructionFromIndex(LRStart.getDefIndex());
if (DefMI && DefMI != CopyMI) {
- int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg, false);
+ int DeadIdx = DefMI->findRegisterDefOperandIdx(li.reg);
if (DeadIdx != -1)
DefMI->getOperand(DeadIdx).setIsDead();
else
@@ -1255,7 +1270,12 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
unsigned &RealDstReg) {
const TargetRegisterClass *RC = mri_->getRegClass(SrcReg);
RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
- assert(RealDstReg && "Invalid extract_subreg instruction!");
+ if (!RealDstReg) {
+ DEBUG(dbgs() << "\tIncompatible source regclass: "
+ << "none of the super-registers of " << tri_->getName(DstReg)
+ << " are in " << RC->getName() << ".\n");
+ return false;
+ }
LiveInterval &RHS = li_->getInterval(SrcReg);
// For this type of EXTRACT_SUBREG, conservatively
@@ -1293,7 +1313,12 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
unsigned &RealSrcReg) {
const TargetRegisterClass *RC = mri_->getRegClass(DstReg);
RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
- assert(RealSrcReg && "Invalid extract_subreg instruction!");
+ if (!RealSrcReg) {
+ DEBUG(dbgs() << "\tIncompatible destination regclass: "
+ << "none of the super-registers of " << tri_->getName(SrcReg)
+ << " are in " << RC->getName() << ".\n");
+ return false;
+ }
LiveInterval &LHS = li_->getInterval(DstReg);
if (li_->hasInterval(RealSrcReg) &&
@@ -1419,7 +1444,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
assert(DstSubRC && "Illegal subregister index");
if (!DstSubRC->contains(SrcSubReg)) {
DEBUG(dbgs() << "\tIncompatible destination regclass: "
- << tri_->getName(SrcSubReg) << " not in "
+ << "none of the super-registers of "
+ << tri_->getName(SrcSubReg) << " are in "
<< DstSubRC->getName() << ".\n");
return false; // Not coalescable.
}
@@ -1436,7 +1462,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
assert(SrcSubRC && "Illegal subregister index");
if (!SrcSubRC->contains(DstSubReg)) {
DEBUG(dbgs() << "\tIncompatible source regclass: "
- << tri_->getName(DstSubReg) << " not in "
+ << "none of the super-registers of "
+ << tri_->getName(DstSubReg) << " are in "
<< SrcSubRC->getName() << ".\n");
(void)DstSubReg;
return false; // Not coalescable.
@@ -2625,7 +2652,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
MachineInstr *UseMI = Use.getParent();
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg)
+ SrcReg == DstReg && SrcSubIdx == DstSubIdx)
// Ignore identity copies.
continue;
SlotIndex Idx = li_->getInstructionIndex(UseMI);
@@ -2654,7 +2681,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start,
// Ignore identity copies.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
- SrcReg == DstReg))
+ SrcReg == DstReg && SrcSubIdx == DstSubIdx))
for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) {
MachineOperand &Use = MI->getOperand(i);
if (Use.isReg() && Use.isUse() && Use.getReg() &&
@@ -2785,7 +2812,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
// If the move will be an identity move delete it
bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
- if (isMove && SrcReg == DstReg) {
+ if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) {
if (li_->hasInterval(SrcReg)) {
LiveInterval &RegInt = li_->getInterval(SrcReg);
// If def of this move instruction is dead, remove its live range
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 63c5554..a7b2efe 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -51,6 +51,7 @@ protected:
MachineFrameInfo *mfi;
MachineRegisterInfo *mri;
const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
VirtRegMap *vrm;
/// Construct a spiller base.
@@ -60,6 +61,7 @@ protected:
mfi = mf->getFrameInfo();
mri = &mf->getRegInfo();
tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
}
/// Add spill ranges for every use/def of the live interval, inserting loads
@@ -129,7 +131,8 @@ protected:
// Insert reload if necessary.
MachineBasicBlock::iterator miItr(mi);
if (hasUse) {
- tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc);
+ tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc,
+ tri);
MachineInstr *loadInstr(prior(miItr));
SlotIndex loadIndex =
lis->InsertMachineInstrInMaps(loadInstr).getDefIndex();
@@ -142,8 +145,8 @@ protected:
// Insert store if necessary.
if (hasDef) {
- tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true,
- ss, trc);
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg,
+ true, ss, trc, tri);
MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
@@ -333,7 +336,8 @@ private:
// Insert a copy at the start of the MBB. The range proceeding the
// copy will be attached to the original LiveInterval.
MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc);
+ tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc,
+ DebugLoc());
MachineInstr *copyMI = defMBB->begin();
copyMI->addRegisterKilled(li->reg, tri);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
@@ -386,7 +390,8 @@ private:
if (isTwoAddr && !twoAddrUseIsUndef) {
MachineBasicBlock *defMBB = defInst->getParent();
- tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc);
+ tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc,
+ DebugLoc());
MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
copyMI->addRegisterKilled(li->reg, tri);
@@ -446,8 +451,9 @@ private:
// reg.
MachineBasicBlock *useMBB = useInst->getParent();
MachineBasicBlock::iterator useItr(useInst);
- tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc);
- MachineInstr *copyMI = next(useItr);
+ tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc,
+ DebugLoc());
+ MachineInstr *copyMI = llvm::next(useItr);
copyMI->addRegisterKilled(newVReg, tri);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
@@ -483,7 +489,8 @@ private:
assert(oldKillRange != 0 && "No kill range?");
tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
- li->reg, newVReg, trc, trc);
+ li->reg, newVReg, trc, trc,
+ DebugLoc());
MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
copyMI->addRegisterKilled(newVReg, tri);
SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 42dfd7f..7f3b452 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -13,6 +13,8 @@
#define DEBUG_TYPE "stackcoloring"
#include "VirtRegMap.h"
+#include "llvm/Function.h"
+#include "llvm/Module.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -116,6 +118,7 @@ namespace {
private:
void InitializeSlots();
+ bool CheckForSetJmpCall(const MachineFunction &MF) const;
void ScanForSpillSlotRefs(MachineFunction &MF);
bool OverlapWithAssignments(LiveInterval *li, int Color) const;
int ColorSlot(LiveInterval *li);
@@ -607,7 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumLoadElim;
} else {
- TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC);
+ TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC,
+ MI->getDebugLoc());
++NumRegRepl;
}
@@ -623,7 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
DEBUG(MI->dump());
++NumStoreElim;
} else {
- TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC);
+ TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC,
+ MI->getDebugLoc());
++NumRegRepl;
}
@@ -697,7 +702,11 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********** Stack Slot Coloring **********\n");
+ DEBUG({
+ dbgs() << "********** Stack Slot Coloring **********\n"
+ << "********** Function: "
+ << MF.getFunction()->getName() << '\n';
+ });
MFI = MF.getFrameInfo();
MRI = &MF.getRegInfo();
@@ -716,6 +725,13 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
return false;
}
+ // If there are calls to setjmp or sigsetjmp, don't perform stack slot
+ // coloring. The stack could be modified before the longjmp is executed,
+ // resulting in the wrong value being used afterwards. (See
+ // <rdar://problem/8007500>.)
+ if (MF.callsSetJmp())
+ return false;
+
// Gather spill slot references
ScanForSpillSlotRefs(MF);
InitializeSlots();
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index f8f6a55..142398c 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -696,7 +696,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// the Phi defining curr.second
MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
TII->copyRegToReg(*PI->getParent(), PI, t,
- curr.second, RC, RC);
+ curr.second, RC, RC, DebugLoc());
DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
<< "\n");
@@ -713,7 +713,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert copy from map[curr.first] to curr.second
TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second,
- map[curr.first], RC, RC);
+ map[curr.first], RC, RC, DebugLoc());
map[curr.first] = curr.second;
DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
<< curr.second << "\n");
@@ -762,7 +762,7 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
// Insert a copy from dest to a new temporary t at the end of b
unsigned t = MF->getRegInfo().createVirtualRegister(RC);
TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t,
- curr.second, RC, RC);
+ curr.second, RC, RC, DebugLoc());
map[curr.second] = t;
MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
@@ -961,7 +961,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first);
TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(),
- I->first, SI->first, RC, RC);
+ I->first, SI->first, RC, RC, DebugLoc());
LI.renumber();
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index aa6e2b4..f2e2a76 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -561,7 +561,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC,RC);
+ CopyInfos[i].second, RC,RC, DebugLoc());
MachineInstr *CopyMI = prior(Loc);
Copies.push_back(CopyMI);
}
@@ -620,7 +620,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
- CopyInfos[i].second, RC, RC);
+ CopyInfos[i].second, RC, RC, DebugLoc());
MachineInstr *CopyMI = prior(Loc);
Copies.push_back(CopyMI);
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 9f95993..71ad3fb 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -22,6 +22,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
@@ -460,6 +461,26 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
= getContext().getMachOSection("__DATA", "__data", 0,
SectionKind::getDataRel());
+ TLSDataSection // .tdata
+ = getContext().getMachOSection("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR,
+ SectionKind::getDataRel());
+ TLSBSSSection // .tbss
+ = getContext().getMachOSection("__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ SectionKind::getThreadBSS());
+
+ // TODO: Verify datarel below.
+ TLSTLVSection // .tlv
+ = getContext().getMachOSection("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
+ SectionKind::getDataRel());
+
+ TLSThreadInitSection
+ = getContext().getMachOSection("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
+ SectionKind::getDataRel());
+
CStringSection // .cstring
= getContext().getMachOSection("__TEXT", "__cstring",
MCSectionMachO::S_CSTRING_LITERALS,
@@ -606,6 +627,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
getContext().getMachOSection("__DWARF", "__debug_inlined",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
+
+ TLSExtraDataSection = TLSTLVSection;
}
const MCSection *TargetLoweringObjectFileMachO::
@@ -646,7 +669,10 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
- assert(!Kind.isThreadLocal() && "Darwin doesn't support TLS");
+
+ // Handle thread local data.
+ if (Kind.isThreadBSS()) return TLSBSSSection;
+ if (Kind.isThreadData()) return TLSDataSection;
if (Kind.isText())
return GV->isWeakForLinker() ? TextCoalSection : TextSection;
@@ -794,94 +820,160 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
// COFF
//===----------------------------------------------------------------------===//
-typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
-
-TargetLoweringObjectFileCOFF::~TargetLoweringObjectFileCOFF() {
- delete (COFFUniqueMapTy*)UniquingMap;
-}
-
-
-const MCSection *TargetLoweringObjectFileCOFF::
-getCOFFSection(StringRef Name, bool isDirective, SectionKind Kind) const {
- // Create the map if it doesn't already exist.
- if (UniquingMap == 0)
- UniquingMap = new COFFUniqueMapTy();
- COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)UniquingMap;
-
- // Do the lookup, if we have a hit, return it.
- const MCSectionCOFF *&Entry = Map[Name];
- if (Entry) return Entry;
-
- return Entry = MCSectionCOFF::Create(Name, isDirective, Kind, getContext());
-}
-
void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
- if (UniquingMap != 0)
- ((COFFUniqueMapTy*)UniquingMap)->clear();
TargetLoweringObjectFile::Initialize(Ctx, TM);
- TextSection = getCOFFSection("\t.text", true, SectionKind::getText());
- DataSection = getCOFFSection("\t.data", true, SectionKind::getDataRel());
+ TextSection =
+ getContext().getCOFFSection(".text",
+ MCSectionCOFF::IMAGE_SCN_CNT_CODE |
+ MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getText());
+ DataSection =
+ getContext().getCOFFSection(".data",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+ ReadOnlySection =
+ getContext().getCOFFSection(".rdata",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
StaticCtorSection =
- getCOFFSection(".ctors", false, SectionKind::getDataRel());
+ getContext().getCOFFSection(".ctors",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
StaticDtorSection =
- getCOFFSection(".dtors", false, SectionKind::getDataRel());
+ getContext().getCOFFSection(".dtors",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
// FIXME: We're emitting LSDA info into a readonly section on COFF, even
// though it contains relocatable pointers. In PIC mode, this is probably a
// big runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
LSDASection =
- getCOFFSection(".gcc_except_table", false, SectionKind::getReadOnly());
+ getContext().getCOFFSection(".gcc_except_table",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getReadOnly());
EHFrameSection =
- getCOFFSection(".eh_frame", false, SectionKind::getDataRel());
+ getContext().getCOFFSection(".eh_frame",
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
// Debug info.
- // FIXME: Don't use 'directive' mode here.
DwarfAbbrevSection =
- getCOFFSection("\t.section\t.debug_abbrev,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_abbrev",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfInfoSection =
- getCOFFSection("\t.section\t.debug_info,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_info",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfLineSection =
- getCOFFSection("\t.section\t.debug_line,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_line",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfFrameSection =
- getCOFFSection("\t.section\t.debug_frame,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_frame",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfPubNamesSection =
- getCOFFSection("\t.section\t.debug_pubnames,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_pubnames",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfPubTypesSection =
- getCOFFSection("\t.section\t.debug_pubtypes,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_pubtypes",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfStrSection =
- getCOFFSection("\t.section\t.debug_str,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_str",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfLocSection =
- getCOFFSection("\t.section\t.debug_loc,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_loc",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfARangesSection =
- getCOFFSection("\t.section\t.debug_aranges,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_aranges",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfRangesSection =
- getCOFFSection("\t.section\t.debug_ranges,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_ranges",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfMacroInfoSection =
- getCOFFSection("\t.section\t.debug_macinfo,\"dr\"",
- true, SectionKind::getMetadata());
+ getContext().getCOFFSection(".debug_macinfo",
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
+
+ DrectveSection =
+ getContext().getCOFFSection(".drectve",
+ MCSectionCOFF::IMAGE_SCN_LNK_INFO,
+ SectionKind::getMetadata());
+}
+
+static unsigned
+getCOFFSectionFlags(SectionKind K) {
+ unsigned Flags = 0;
+
+ if (!K.isMetadata())
+ Flags |=
+ MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE;
+ else if (K.isText())
+ Flags |=
+ MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE |
+ MCSectionCOFF::IMAGE_SCN_CNT_CODE;
+ else if (K.isBSS ())
+ Flags |=
+ MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+ else if (K.isReadOnly())
+ Flags |=
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ;
+ else if (K.isWriteable())
+ Flags |=
+ MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ MCSectionCOFF::IMAGE_SCN_MEM_READ |
+ MCSectionCOFF::IMAGE_SCN_MEM_WRITE;
+
+ return Flags;
}
const MCSection *TargetLoweringObjectFileCOFF::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
- return getCOFFSection(GV->getSection(), false, Kind);
+ return getContext().getCOFFSection(GV->getSection(),
+ getCOFFSectionFlags(Kind),
+ Kind);
}
static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
if (Kind.isText())
return ".text$linkonce";
+ if (Kind.isBSS ())
+ return ".bss$linkonce";
if (Kind.isWriteable())
return ".data$linkonce";
return ".rdata$linkonce";
@@ -900,7 +992,13 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
MCSymbol *Sym = Mang->getSymbol(GV);
Name.append(Sym->getName().begin(), Sym->getName().end());
- return getCOFFSection(Name.str(), false, Kind);
+
+ unsigned Characteristics = getCOFFSectionFlags(Kind);
+
+ Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT;
+
+ return getContext().getCOFFSection(Name.str(), Characteristics,
+ MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
}
if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c288ae0..3d10dc1 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -40,6 +40,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
@@ -77,6 +78,10 @@ namespace {
// registers from virtual registers. e.g. r1 = move v1024.
DenseMap<unsigned, unsigned> DstRegMap;
+ /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen
+ /// during the initial walk of the machine function.
+ SmallVector<MachineInstr*, 16> RegSequences;
+
bool Sink3AddrInstruction(MachineBasicBlock *MBB, MachineInstr *MI,
unsigned Reg,
MachineBasicBlock::iterator OldPos);
@@ -123,6 +128,13 @@ namespace {
void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &Processed);
+ void CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs, unsigned DstReg);
+
+ /// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+ /// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+ /// sub-register references of the register defined by REG_SEQUENCE.
+ bool EliminateRegSequences();
+
public:
static char ID; // Pass identification, replacement for typeid
TwoAddressInstructionPass() : MachineFunctionPass(&ID) {}
@@ -768,7 +780,7 @@ canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
if (!LastKill)
return false;
- bool isModRef = LastKill->modifiesRegister(Kill);
+ bool isModRef = LastKill->definesRegister(Kill);
NewKills.push_back(std::make_pair(std::make_pair(Kill, isModRef),
LastKill));
}
@@ -929,6 +941,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
continue;
}
+ // Remember REG_SEQUENCE instructions, we'll deal with them later.
+ if (mi->isRegSequence())
+ RegSequences.push_back(&*mi);
+
const TargetInstrDesc &TID = mi->getDesc();
bool FirstTied = true;
@@ -1035,7 +1051,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
ReMatRegs.set(regB);
++NumReMats;
} else {
- bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc);
+ bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc,
+ mi->getDebugLoc());
(void)Emitted;
assert(Emitted && "Unable to issue a copy instruction!\n");
}
@@ -1110,5 +1127,211 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
VReg = ReMatRegs.find_next(VReg);
}
+ // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
+ // SSA form. It's now safe to de-SSA.
+ MadeChange |= EliminateRegSequences();
+
return MadeChange;
}
+
+static void UpdateRegSequenceSrcs(unsigned SrcReg,
+ unsigned DstReg, unsigned SubIdx,
+ MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ MO.setReg(DstReg);
+ assert(MO.getSubReg() == 0);
+ MO.setSubReg(SubIdx);
+ }
+}
+
+/// CoalesceExtSubRegs - If a number of sources of the REG_SEQUENCE are
+/// EXTRACT_SUBREG from the same register and to the same virtual register
+/// with different sub-register indices, attempt to combine the
+/// EXTRACT_SUBREGs and pre-coalesce them. e.g.
+/// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
+/// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
+/// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
+/// Since D subregs 5, 6 can combine to a Q register, we can coalesce
+/// reg1026 to reg1029.
+void
+TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
+ unsigned DstReg) {
+ SmallSet<unsigned, 4> Seen;
+ for (unsigned i = 0, e = Srcs.size(); i != e; ++i) {
+ unsigned SrcReg = Srcs[i];
+ if (!Seen.insert(SrcReg))
+ continue;
+
+ // If there are no other uses than extract_subreg which feed into
+ // the reg_sequence, then we might be able to coalesce them.
+ bool CanCoalesce = true;
+ SmallVector<unsigned, 4> SubIndices;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SrcReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (!UseMI->isExtractSubreg() ||
+ UseMI->getOperand(0).getReg() != DstReg) {
+ CanCoalesce = false;
+ break;
+ }
+ SubIndices.push_back(UseMI->getOperand(2).getImm());
+ }
+
+ if (!CanCoalesce || SubIndices.size() < 2)
+ continue;
+
+ std::sort(SubIndices.begin(), SubIndices.end());
+ unsigned NewSubIdx = 0;
+ if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
+ NewSubIdx)) {
+ bool Proceed = true;
+ if (NewSubIdx)
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ // FIXME: If the sub-registers do not combine to the whole
+ // super-register, i.e. NewSubIdx != 0, and any of the use has a
+ // sub-register index, then abort the coalescing attempt.
+ if (MO.getSubReg()) {
+ Proceed = false;
+ break;
+ }
+ MO.setReg(DstReg);
+ MO.setSubReg(NewSubIdx);
+ }
+ if (Proceed)
+ for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
+ RE = MRI->reg_end(); RI != RE; ) {
+ MachineOperand &MO = RI.getOperand();
+ ++RI;
+ MO.setReg(DstReg);
+ if (NewSubIdx)
+ MO.setSubReg(NewSubIdx);
+ }
+ }
+ }
+}
+
+static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq,
+ MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI != RegSeq && UseMI->isRegSequence())
+ return true;
+ }
+ return false;
+}
+
+/// EliminateRegSequences - Eliminate REG_SEQUENCE instructions as part
+/// of the de-ssa process. This replaces sources of REG_SEQUENCE as
+/// sub-register references of the register defined by REG_SEQUENCE. e.g.
+///
+/// %reg1029<def>, %reg1030<def> = VLD1q16 %reg1024<kill>, ...
+/// %reg1031<def> = REG_SEQUENCE %reg1029<kill>, 5, %reg1030<kill>, 6
+/// =>
+/// %reg1031:5<def>, %reg1031:6<def> = VLD1q16 %reg1024<kill>, ...
+bool TwoAddressInstructionPass::EliminateRegSequences() {
+ if (RegSequences.empty())
+ return false;
+
+ for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) {
+ MachineInstr *MI = RegSequences[i];
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (MI->getOperand(0).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ !(MI->getNumOperands() & 1)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ bool IsImpDef = true;
+ SmallVector<unsigned, 4> RealSrcs;
+ SmallSet<unsigned, 4> Seen;
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ if (MI->getOperand(i).getSubReg() ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+ DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
+ llvm_unreachable(0);
+ }
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isImplicitDef()) {
+ DefMI->eraseFromParent();
+ continue;
+ }
+ IsImpDef = false;
+
+ // Remember EXTRACT_SUBREG sources. These might be candidate for
+ // coalescing.
+ if (DefMI->isExtractSubreg())
+ RealSrcs.push_back(DefMI->getOperand(1).getReg());
+
+ if (!Seen.insert(SrcReg) ||
+ MI->getParent() != DefMI->getParent() ||
+ !MI->getOperand(i).isKill() ||
+ HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ // REG_SEQUENCE cannot have duplicated operands, add a copy.
+ // Also add an copy if the source is live-in the block. We don't want
+ // to end up with a partial-redef of a livein, e.g.
+ // BB0:
+ // reg1051:10<def> =
+ // ...
+ // BB1:
+ // ... = reg1051:10
+ // BB2:
+ // reg1051:9<def> =
+ // LiveIntervalAnalysis won't like it.
+ //
+ // If the REG_SEQUENCE doesn't kill its source, keeping live variables
+ // correctly up to date becomes very difficult. Insert a copy.
+ //
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MachineBasicBlock::iterator InsertLoc = MI;
+ bool Emitted =
+ TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC,
+ MI->getDebugLoc());
+ (void)Emitted;
+ assert(Emitted && "Unable to issue a copy instruction!\n");
+ MI->getOperand(i).setReg(NewReg);
+ if (MI->getOperand(i).isKill()) {
+ MachineBasicBlock::iterator CopyMI = prior(InsertLoc);
+ MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg);
+ KillMO->setIsKill();
+ if (LV)
+ // Update live variables
+ LV->replaceKillInstruction(SrcReg, MI, &*CopyMI);
+ }
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
+ unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
+ UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI);
+ }
+
+ if (IsImpDef) {
+ DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF");
+ MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
+ for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
+ MI->RemoveOperand(j);
+ } else {
+ DEBUG(dbgs() << "Eliminated: " << *MI);
+ MI->eraseFromParent();
+ }
+
+ // Try coalescing some EXTRACT_SUBREG instructions.
+ CoalesceExtSubRegs(RealSrcs, DstReg);
+ }
+
+ RegSequences.clear();
+ return true;
+}
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 7f0412c..871d836 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -907,7 +907,7 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
TRI, VRM);
} else {
TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg,
- NewOp.StackSlotOrReMat, AliasRC);
+ NewOp.StackSlotOrReMat, AliasRC, TRI);
MachineInstr *LoadMI = prior(InsertLoc);
VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI);
// Any stores to this stack slot are not dead anymore.
@@ -1265,7 +1265,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF);
// Load from SS to the spare physical register.
- TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC);
+ TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI);
// This invalidates Phys.
Spills.ClobberPhysReg(PhysReg);
// Remember it's available.
@@ -1308,7 +1308,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
} while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
// Store the value back into SS.
- TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC);
+ TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI);
MachineInstr *StoreMI = prior(NextMII);
VRM->addSpillSlotUse(SS, StoreMI);
VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
@@ -1523,7 +1523,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
// Insert new def MI and spill MI.
const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC);
+ TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI);
MII = prior(MII);
MachineInstr *StoreMI = MII;
VRM->addSpillSlotUse(SS, StoreMI);
@@ -1566,7 +1566,8 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
std::vector<MachineOperand*> &KillOps) {
MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
- TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
+ TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC,
+ TRI);
MachineInstr *StoreMI = prior(oldNextMII);
VRM->addSpillSlotUse(StackSlot, StoreMI);
DEBUG(dbgs() << "Store:\t" << *StoreMI);
@@ -1709,7 +1710,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
if (UsedSS.count(SS))
llvm_unreachable("Need to spill more than one physical registers!");
UsedSS.insert(SS);
- TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC);
+ TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI);
MachineInstr *StoreMI = prior(MII);
VRM->addSpillSlotUse(SS, StoreMI);
@@ -1718,7 +1719,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) {
ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS,
TII, *MBB->getParent());
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI);
MachineInstr *LoadMI = prior(InsertLoc);
VRM->addSpillSlotUse(SS, LoadMI);
@@ -1793,7 +1794,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII,
*MBB->getParent());
- TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC);
+ TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC,
+ MI->getDebugLoc());
// This invalidates Phys.
Spills.ClobberPhysReg(Phys);
@@ -1821,7 +1823,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM);
} else {
const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI);
MachineInstr *LoadMI = prior(InsertLoc);
VRM->addSpillSlotUse(SSorRMId, LoadMI);
++NumLoads;
@@ -1857,7 +1859,7 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
int StackSlot = VRM->getStackSlot(VirtReg);
MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot,
- RC);
+ RC, TRI);
MachineInstr *StoreMI = prior(oldNextMII);
VRM->addSpillSlotUse(StackSlot, StoreMI);
DEBUG(dbgs() << "Store:\t" << *StoreMI);
@@ -1893,6 +1895,11 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Clear kill info.
SmallSet<unsigned, 2> KilledMIRegs;
+
+ // Keep track of the registers we have already spilled in case there are
+ // multiple defs of the same register in MI.
+ SmallSet<unsigned, 8> SpilledMIRegs;
+
RegKills.reset();
KillOps.clear();
KillOps.resize(TRI->getNumRegs(), NULL);
@@ -2138,7 +2145,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
SSorRMId, TII, MF);
- TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
+ TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC,
+ MI.getDebugLoc());
MachineInstr *CopyMI = prior(InsertLoc);
CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
@@ -2183,7 +2191,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
} else {
const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
MachineInstr *LoadMI = prior(InsertLoc);
VRM->addSpillSlotUse(SSorRMId, LoadMI);
++NumLoads;
@@ -2262,7 +2270,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
DEBUG(dbgs() << "Promoted Load To Copy: " << MI);
if (DestReg != InReg) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
- TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC);
+ TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC,
+ MI.getDebugLoc());
MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg);
unsigned SubIdx = DefMO->getSubReg();
// Revisit the copy so we make sure to notice the effects of the
@@ -2408,6 +2417,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
}
// Process all of the spilled defs.
+ SpilledMIRegs.clear();
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!(MO.isReg() && MO.getReg() && MO.isDef()))
@@ -2421,7 +2431,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// eliminate this or else the undef marker is lost and it will
// confuses the scavenger. This is extremely rare.
unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst &&
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
+ Src == Dst && SrcSR == DstSR &&
!MI.findRegisterUseOperand(Src)->isUndef()) {
++NumDCE;
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
@@ -2500,7 +2511,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
MI.getOperand(i).setReg(RReg);
MI.getOperand(i).setSubReg(0);
- if (!MO.isDead()) {
+ if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) {
MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true,
LastStore, Spills, ReMatDefs, RegKills, KillOps);
@@ -2510,7 +2521,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// instruction before considering the dest reg to be changed.
{
unsigned Src, Dst, SrcSR, DstSR;
- if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst) {
+ if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) &&
+ Src == Dst && SrcSR == DstSR) {
++NumDCE;
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
InvalidateKills(MI, TRI, RegKills, KillOps);
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 9d07811..5f30dce 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -33,8 +33,27 @@ extern const char* ProgramName;
}
namespace {
- int ExecuteProgram(const std::string& name,
- const StrVector& args) {
+
+ void PrintString (const std::string& str) {
+ errs() << str << ' ';
+ }
+
+ void PrintCommand (const std::string& Cmd, const StrVector& Args) {
+ errs() << Cmd << ' ';
+ std::for_each(Args.begin(), Args.end(), &PrintString);
+ errs() << '\n';
+ }
+
+ bool IsSegmentationFault (int returnCode) {
+#ifdef LLVM_ON_WIN32
+ return (returnCode >= 0xc0000000UL)
+#else
+ return (returnCode < 0);
+#endif
+ }
+
+ int ExecuteProgram (const std::string& name,
+ const StrVector& args) {
sys::Path prog = sys::Program::FindProgramByName(name);
if (prog.isEmpty()) {
@@ -67,24 +86,25 @@ namespace {
argv.push_back(0); // null terminate list.
// Invoke the program.
- return sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
- }
+ int ret = sys::Program::ExecuteAndWait(prog, &argv[0], 0, &redirects[0]);
- void print_string (const std::string& str) {
- errs() << str << ' ';
+ if (IsSegmentationFault(ret)) {
+ errs() << "Segmentation fault: ";
+ PrintCommand(name, args);
+ }
+
+ return ret;
}
}
namespace llvmc {
- void AppendToGlobalTimeLog(const std::string& cmd, double time);
+ void AppendToGlobalTimeLog (const std::string& cmd, double time);
}
-int llvmc::Action::Execute() const {
- if (DryRun || VerboseMode) {
- errs() << Command_ << " ";
- std::for_each(Args_.begin(), Args_.end(), print_string);
- errs() << '\n';
- }
+int llvmc::Action::Execute () const {
+ if (DryRun || VerboseMode)
+ PrintCommand(Command_, Args_);
+
if (!DryRun) {
if (Time) {
sys::TimeValue now = sys::TimeValue::now();
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index b17827e..be7f1f5 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -715,7 +715,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
case Instruction::FDiv:
GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break;
case Instruction::FRem:
- GV.FloatVal = ::fmodf(LHS.FloatVal,RHS.FloatVal); break;
+ GV.FloatVal = std::fmod(LHS.FloatVal,RHS.FloatVal); break;
}
break;
case Type::DoubleTyID:
@@ -730,7 +730,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
case Instruction::FDiv:
GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break;
case Instruction::FRem:
- GV.DoubleVal = ::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
+ GV.DoubleVal = std::fmod(LHS.DoubleVal,RHS.DoubleVal); break;
}
break;
case Type::X86_FP80TyID:
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index dba0e14..5e8a3b6 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -10,10 +10,13 @@ add_llvm_library(LLVMMC
MCExpr.cpp
MCInst.cpp
MCInstPrinter.cpp
+ MCLabel.cpp
+ MCLoggingStreamer.cpp
MCMachOStreamer.cpp
MCNullStreamer.cpp
MCObjectWriter.cpp
MCSection.cpp
+ MCSectionCOFF.cpp
MCSectionELF.cpp
MCSectionMachO.cpp
MCStreamer.cpp
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 2b23994..a275be2 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -21,6 +21,7 @@ using namespace llvm;
MCAsmInfo::MCAsmInfo() {
HasSubsectionsViaSymbols = false;
HasMachoZeroFillDirective = false;
+ HasMachoTBSSDirective = false;
HasStaticCtorDtorReferenceInStaticMode = false;
MaxInstLength = 4;
PCSymbol = "$";
diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp
index 3c31caa..0bd3b2d 100644
--- a/lib/MC/MCAsmInfoDarwin.cpp
+++ b/lib/MC/MCAsmInfoDarwin.cpp
@@ -35,6 +35,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
WeakRefDirective = "\t.weak_reference ";
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
HasMachoZeroFillDirective = true; // Uses .zerofill
+ HasMachoTBSSDirective = true; // Uses .tbss
HasStaticCtorDtorReferenceInStaticMode = true;
HiddenVisibilityAttr = MCSA_PrivateExtern;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 2c7e1c4..57b2bcc 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -109,7 +109,10 @@ public:
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
-
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass);
+ virtual void EmitCOFFSymbolType(int Type);
+ virtual void EndCOFFSymbolDef();
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
@@ -123,6 +126,9 @@ public:
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
+
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
@@ -218,6 +224,7 @@ void MCAsmStreamer::SwitchSection(const MCSection *Section) {
void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(CurSection && "Cannot emit before setting section!");
OS << *Symbol << ":";
@@ -234,16 +241,11 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
}
void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- // Only absolute symbols can be redefined.
- assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
- "Cannot define a symbol twice!");
-
OS << *Symbol << " = " << *Value;
EmitEOL();
// FIXME: Lift context changes into super class.
- // FIXME: Set associated section.
- Symbol->setValue(Value);
+ Symbol->setVariableValue(Value);
}
void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -297,6 +299,26 @@ void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
EmitEOL();
}
+void MCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ OS << "\t.def\t " << *Symbol << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {
+ OS << "\t.scl\t" << StorageClass << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EmitCOFFSymbolType (int Type) {
+ OS << "\t.type\t" << Type << ';';
+ EmitEOL();
+}
+
+void MCAsmStreamer::EndCOFFSymbolDef() {
+ OS << "\t.endef";
+ EmitEOL();
+}
+
void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(MAI.hasDotTypeDotSizeDirective());
OS << "\t.size\t" << *Symbol << ", " << *Value << '\n';
@@ -341,6 +363,23 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
EmitEOL();
}
+// .tbss sym, size, align
+// This depends that the symbol has already been mangled from the original,
+// e.g. _a.
+void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ assert(Symbol != NULL && "Symbol shouldn't be NULL!");
+ // Instead of using the Section we'll just use the shortcut.
+ // This is a mach-o specific directive and section.
+ OS << ".tbss " << *Symbol << ", " << Size;
+
+ // Output align if we have it. We default to 1 so don't bother printing
+ // that.
+ if (ByteAlignment > 1) OS << ", " << Log2_32(ByteAlignment);
+
+ EmitEOL();
+}
+
static inline char toOctal(int X) { return (X&7)+'0'; }
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
@@ -630,9 +669,11 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
AddEncodingComment(Inst);
// Show the MCInst if enabled.
- if (ShowInst)
+ if (ShowInst) {
Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
-
+ GetCommentOS() << "\n";
+ }
+
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
InstPrinter->printInst(&Inst, OS);
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 69afcc8..5936656 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -47,93 +47,131 @@ STATISTIC(SectionLayouts, "Number of section layouts");
/* *** */
-void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
- // We shouldn't have to do anything special to support negative slides, and it
- // is a perfectly valid thing to do as long as other parts of the system are
- // can guarantee convergence.
- assert(SlideAmount >= 0 && "Negative slides not yet supported");
+MCAsmLayout::MCAsmLayout(MCAssembler &Asm)
+ : Assembler(Asm), LastValidFragment(0)
+ {
+ // Compute the section layout order. Virtual sections must go last.
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (!Asm.getBackend().isVirtualSection(it->getSection()))
+ SectionOrder.push_back(&*it);
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it)
+ if (Asm.getBackend().isVirtualSection(it->getSection()))
+ SectionOrder.push_back(&*it);
+}
- // Update the layout by simply recomputing the layout for the entire
- // file. This is trivially correct, but very slow.
- //
- // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter.
+bool MCAsmLayout::isSectionUpToDate(const MCSectionData *SD) const {
+ // The first section is always up-to-date.
+ unsigned Index = SD->getLayoutOrder();
+ if (!Index)
+ return true;
- // Layout the concrete sections and fragments.
- MCAssembler &Asm = getAssembler();
- uint64_t Address = 0;
- for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
- // Skip virtual sections.
- if (Asm.getBackend().isVirtualSection(it->getSection()))
- continue;
+ // Otherwise, sections are always implicitly computed when the preceeding
+ // fragment is layed out.
+ const MCSectionData *Prev = getSectionOrder()[Index - 1];
+ return isFragmentUpToDate(&(Prev->getFragmentList().back()));
+}
+
+bool MCAsmLayout::isFragmentUpToDate(const MCFragment *F) const {
+ return (LastValidFragment &&
+ F->getLayoutOrder() <= LastValidFragment->getLayoutOrder());
+}
- // Layout the section fragments and its size.
- Address = Asm.LayoutSection(*it, *this, Address);
+void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
+ // If this fragment wasn't already up-to-date, we don't need to do anything.
+ if (!isFragmentUpToDate(F))
+ return;
+
+ // Otherwise, reset the last valid fragment to the predecessor of the
+ // invalidated fragment.
+ LastValidFragment = F->getPrevNode();
+ if (!LastValidFragment) {
+ unsigned Index = F->getParent()->getLayoutOrder();
+ if (Index != 0) {
+ MCSectionData *Prev = getSectionOrder()[Index - 1];
+ LastValidFragment = &(Prev->getFragmentList().back());
+ }
}
+}
- // Layout the virtual sections.
- for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
- if (!Asm.getBackend().isVirtualSection(it->getSection()))
- continue;
+void MCAsmLayout::EnsureValid(const MCFragment *F) const {
+ // Advance the layout position until the fragment is up-to-date.
+ while (!isFragmentUpToDate(F)) {
+ // Advance to the next fragment.
+ MCFragment *Cur = LastValidFragment;
+ if (Cur)
+ Cur = Cur->getNextNode();
+ if (!Cur) {
+ unsigned NextIndex = 0;
+ if (LastValidFragment)
+ NextIndex = LastValidFragment->getParent()->getLayoutOrder() + 1;
+ Cur = SectionOrder[NextIndex]->begin();
+ }
- // Layout the section fragments and its size.
- Address = Asm.LayoutSection(*it, *this, Address);
+ const_cast<MCAsmLayout*>(this)->LayoutFragment(Cur);
}
}
+void MCAsmLayout::FragmentReplaced(MCFragment *Src, MCFragment *Dst) {
+ if (LastValidFragment == Src)
+ LastValidFragment = Dst;
+
+ Dst->Offset = Src->Offset;
+ Dst->EffectiveSize = Src->EffectiveSize;
+}
+
uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const {
assert(F->getParent() && "Missing section()!");
return getSectionAddress(F->getParent()) + getFragmentOffset(F);
}
uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const {
+ EnsureValid(F);
assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!");
return F->EffectiveSize;
}
-void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) {
- F->EffectiveSize = Value;
-}
-
uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+ EnsureValid(F);
assert(F->Offset != ~UINT64_C(0) && "Address not set!");
return F->Offset;
}
-void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) {
- F->Offset = Value;
-}
-
uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const {
assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!");
return getFragmentAddress(SD->getFragment()) + SD->getOffset();
}
uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const {
+ EnsureValid(SD->begin());
assert(SD->Address != ~UINT64_C(0) && "Address not set!");
return SD->Address;
}
-void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) {
- SD->Address = Value;
-}
-
-uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
- assert(SD->Size != ~UINT64_C(0) && "File size not set!");
- return SD->Size;
-}
-void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) {
- SD->Size = Value;
+uint64_t MCAsmLayout::getSectionAddressSize(const MCSectionData *SD) const {
+ // The size is the last fragment's end offset.
+ const MCFragment &F = SD->getFragmentList().back();
+ return getFragmentOffset(&F) + getFragmentEffectiveSize(&F);
}
uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
- assert(SD->FileSize != ~UINT64_C(0) && "File size not set!");
- return SD->FileSize;
-}
-void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) {
- SD->FileSize = Value;
+ // Virtual sections have no file size.
+ if (getAssembler().getBackend().isVirtualSection(SD->getSection()))
+ return 0;
+
+ // Otherwise, the file size is the same as the address space size.
+ return getSectionAddressSize(SD);
}
- /// @}
+uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
+ // The logical size is the address space size minus any tail padding.
+ uint64_t Size = getSectionAddressSize(SD);
+ const MCAlignFragment *AF =
+ dyn_cast<MCAlignFragment>(&(SD->getFragmentList().back()));
+ if (AF && AF->hasOnlyAlignAddress())
+ Size -= getFragmentEffectiveSize(AF);
+
+ return Size;
+}
/* *** */
@@ -141,17 +179,12 @@ MCFragment::MCFragment() : Kind(FragmentType(~0)) {
}
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
- : Kind(_Kind),
- Parent(_Parent),
- EffectiveSize(~UINT64_C(0))
+ : Kind(_Kind), Parent(_Parent), Atom(0), EffectiveSize(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
}
-MCFragment::~MCFragment() {
-}
-
/* *** */
MCSectionData::MCSectionData() : Section(0) {}
@@ -160,8 +193,6 @@ MCSectionData::MCSectionData(const MCSection &_Section, MCAssembler *A)
: Section(&_Section),
Alignment(1),
Address(~UINT64_C(0)),
- Size(~UINT64_C(0)),
- FileSize(~UINT64_C(0)),
HasInstructions(false)
{
if (A)
@@ -195,7 +226,7 @@ MCAssembler::~MCAssembler() {
}
static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
- const MCAsmFixup &Fixup,
+ const MCFixup &Fixup,
const MCValue Target,
const MCSection *BaseSection) {
// The effective fixup address is
@@ -233,7 +264,7 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
const MCAsmLayout &Layout,
- const MCAsmFixup &Fixup,
+ const MCFixup &Fixup,
const MCValue Target,
const MCSymbolData *BaseSymbol) {
// The effective fixup address is
@@ -291,36 +322,6 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
SD->getFragment()->getParent()->getSection());
}
-// FIXME-PERF: This routine is really slow.
-const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout,
- const MCSectionData *Section,
- uint64_t Address) const {
- const MCSymbolData *Best = 0;
- uint64_t BestAddress = 0;
-
- for (MCAssembler::const_symbol_iterator it = symbol_begin(),
- ie = symbol_end(); it != ie; ++it) {
- // Ignore non-linker visible symbols.
- if (!isSymbolLinkerVisible(it))
- continue;
-
- // Ignore symbols not in the same section.
- if (!it->getFragment() || it->getFragment()->getParent() != Section)
- continue;
-
- // Otherwise, find the closest symbol preceding this address (ties are
- // resolved in favor of the last defined symbol).
- uint64_t SymbolAddress = Layout.getSymbolAddress(it);
- if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) {
- Best = it;
- BestAddress = SymbolAddress;
- }
- }
-
- return Best;
-}
-
-// FIXME-PERF: This routine is really slow.
const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
const MCSymbolData *SD) const {
// Linker visible symbols define atoms.
@@ -331,17 +332,22 @@ const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
if (!SD->getFragment())
return 0;
- // Otherwise, search by address.
- return getAtomForAddress(Layout, SD->getFragment()->getParent(),
- Layout.getSymbolAddress(SD));
+ // Non-linker visible symbols in sections which can't be atomized have no
+ // defining atom.
+ if (!getBackend().isSectionAtomizable(
+ SD->getFragment()->getParent()->getSection()))
+ return 0;
+
+ // Otherwise, return the atom for the containing fragment.
+ return SD->getFragment()->getAtom();
}
bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
- const MCAsmFixup &Fixup, const MCFragment *DF,
+ const MCFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
++stats::EvaluateFixup;
- if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
+ if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout))
report_fatal_error("expected relocatable expression");
// FIXME: How do non-scattered symbols work in ELF? I presume the linker
@@ -350,8 +356,8 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
Value = Target.getConstant();
- bool IsPCRel =
- Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel;
+ bool IsPCRel = Emitter.getFixupKindInfo(
+ Fixup.getKind()).Flags & MCFixupKindInfo::FKF_IsPCRel;
bool IsResolved = true;
if (const MCSymbolRefExpr *A = Target.getSymA()) {
if (A->getSymbol().isDefined())
@@ -374,8 +380,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
// symbol) that the fixup value is relative to.
const MCSymbolData *BaseSymbol = 0;
if (IsPCRel) {
- BaseSymbol = getAtomForAddress(
- Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset);
+ BaseSymbol = DF->getAtom();
if (!BaseSymbol)
IsResolved = false;
}
@@ -394,117 +399,123 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
}
if (IsPCRel)
- Value -= Layout.getFragmentAddress(DF) + Fixup.Offset;
+ Value -= Layout.getFragmentAddress(DF) + Fixup.getOffset();
return IsResolved;
}
-uint64_t MCAssembler::LayoutSection(MCSectionData &SD,
- MCAsmLayout &Layout,
- uint64_t StartAddress) {
- bool IsVirtual = getBackend().isVirtualSection(SD.getSection());
-
- ++stats::SectionLayouts;
-
- // Align this section if necessary by adding padding bytes to the previous
- // section. It is safe to adjust this out-of-band, because no symbol or
- // fragment is allowed to point past the end of the section at any time.
- if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) {
- // Unless this section is virtual (where we are allowed to adjust the offset
- // freely), the padding goes in the previous section.
- if (!IsVirtual) {
- // Find the previous non-virtual section.
- iterator it = &SD;
- assert(it != begin() && "Invalid initial section address!");
- for (--it; getBackend().isVirtualSection(it->getSection()); --it) ;
- Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad);
- }
+uint64_t MCAssembler::ComputeFragmentSize(MCAsmLayout &Layout,
+ const MCFragment &F,
+ uint64_t SectionAddress,
+ uint64_t FragmentOffset) const {
+ switch (F.getKind()) {
+ case MCFragment::FT_Data:
+ return cast<MCDataFragment>(F).getContents().size();
+ case MCFragment::FT_Fill:
+ return cast<MCFillFragment>(F).getSize();
+ case MCFragment::FT_Inst:
+ return cast<MCInstFragment>(F).getInstSize();
- StartAddress += Pad;
- }
+ case MCFragment::FT_Align: {
+ const MCAlignFragment &AF = cast<MCAlignFragment>(F);
- // Set the aligned section address.
- Layout.setSectionAddress(&SD, StartAddress);
+ assert((!AF.hasOnlyAlignAddress() || !AF.getNextNode()) &&
+ "Invalid OnlyAlignAddress bit, not the last fragment!");
- uint64_t Address = StartAddress;
- for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) {
- MCFragment &F = *it;
+ uint64_t Size = OffsetToAlignment(SectionAddress + FragmentOffset,
+ AF.getAlignment());
- ++stats::FragmentLayouts;
+ // Honor MaxBytesToEmit.
+ if (Size > AF.getMaxBytesToEmit())
+ return 0;
- uint64_t FragmentOffset = Address - StartAddress;
- Layout.setFragmentOffset(&F, FragmentOffset);
+ return Size;
+ }
- // Evaluate fragment size.
- uint64_t EffectiveSize = 0;
- switch (F.getKind()) {
- case MCFragment::FT_Align: {
- MCAlignFragment &AF = cast<MCAlignFragment>(F);
+ case MCFragment::FT_Org: {
+ const MCOrgFragment &OF = cast<MCOrgFragment>(F);
- EffectiveSize = OffsetToAlignment(Address, AF.getAlignment());
- if (EffectiveSize > AF.getMaxBytesToEmit())
- EffectiveSize = 0;
- break;
- }
+ // FIXME: We should compute this sooner, we don't want to recurse here, and
+ // we would like to be more functional.
+ int64_t TargetLocation;
+ if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
+ report_fatal_error("expected assembly-time absolute expression");
- case MCFragment::FT_Data:
- EffectiveSize = cast<MCDataFragment>(F).getContents().size();
- break;
+ // FIXME: We need a way to communicate this error.
+ int64_t Offset = TargetLocation - FragmentOffset;
+ if (Offset < 0)
+ report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
+ "' (at offset '" + Twine(FragmentOffset) + "'");
- case MCFragment::FT_Fill: {
- MCFillFragment &FF = cast<MCFillFragment>(F);
- EffectiveSize = FF.getValueSize() * FF.getCount();
- break;
- }
+ return Offset;
+ }
+ }
- case MCFragment::FT_Inst:
- EffectiveSize = cast<MCInstFragment>(F).getInstSize();
- break;
+ assert(0 && "invalid fragment kind");
+ return 0;
+}
- case MCFragment::FT_Org: {
- MCOrgFragment &OF = cast<MCOrgFragment>(F);
+void MCAsmLayout::LayoutFile() {
+ // Initialize the first section and set the valid fragment layout point. All
+ // actual layout computations are done lazily.
+ LastValidFragment = 0;
+ if (!getSectionOrder().empty())
+ getSectionOrder().front()->Address = 0;
+}
- int64_t TargetLocation;
- if (!OF.getOffset().EvaluateAsAbsolute(TargetLocation, &Layout))
- report_fatal_error("expected assembly-time absolute expression");
+void MCAsmLayout::LayoutFragment(MCFragment *F) {
+ MCFragment *Prev = F->getPrevNode();
- // FIXME: We need a way to communicate this error.
- int64_t Offset = TargetLocation - FragmentOffset;
- if (Offset < 0)
- report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
- "' (at offset '" + Twine(FragmentOffset) + "'");
+ // We should never try to recompute something which is up-to-date.
+ assert(!isFragmentUpToDate(F) && "Attempt to recompute up-to-date fragment!");
+ // We should never try to compute the fragment layout if the section isn't
+ // up-to-date.
+ assert(isSectionUpToDate(F->getParent()) &&
+ "Attempt to compute fragment before it's section!");
+ // We should never try to compute the fragment layout if it's predecessor
+ // isn't up-to-date.
+ assert((!Prev || isFragmentUpToDate(Prev)) &&
+ "Attempt to compute fragment before it's predecessor!");
- EffectiveSize = Offset;
- break;
- }
+ ++stats::FragmentLayouts;
- case MCFragment::FT_ZeroFill: {
- MCZeroFillFragment &ZFF = cast<MCZeroFillFragment>(F);
+ // Compute the fragment start address.
+ uint64_t StartAddress = F->getParent()->Address;
+ uint64_t Address = StartAddress;
+ if (Prev)
+ Address += Prev->Offset + Prev->EffectiveSize;
+
+ // Compute fragment offset and size.
+ F->Offset = Address - StartAddress;
+ F->EffectiveSize = getAssembler().ComputeFragmentSize(*this, *F, StartAddress,
+ F->Offset);
+ LastValidFragment = F;
+
+ // If this is the last fragment in a section, update the next section address.
+ if (!F->getNextNode()) {
+ unsigned NextIndex = F->getParent()->getLayoutOrder() + 1;
+ if (NextIndex != getSectionOrder().size())
+ LayoutSection(getSectionOrder()[NextIndex]);
+ }
+}
- // Align the fragment offset; it is safe to adjust the offset freely since
- // this is only in virtual sections.
- //
- // FIXME: We shouldn't be doing this here.
- Address = RoundUpToAlignment(Address, ZFF.getAlignment());
- Layout.setFragmentOffset(&F, Address - StartAddress);
+void MCAsmLayout::LayoutSection(MCSectionData *SD) {
+ unsigned SectionOrderIndex = SD->getLayoutOrder();
- EffectiveSize = ZFF.getSize();
- break;
- }
- }
+ ++stats::SectionLayouts;
- Layout.setFragmentEffectiveSize(&F, EffectiveSize);
- Address += EffectiveSize;
+ // Compute the section start address.
+ uint64_t StartAddress = 0;
+ if (SectionOrderIndex) {
+ MCSectionData *Prev = getSectionOrder()[SectionOrderIndex - 1];
+ StartAddress = getSectionAddress(Prev) + getSectionAddressSize(Prev);
}
- // Set the section sizes.
- Layout.setSectionSize(&SD, Address - StartAddress);
- if (IsVirtual)
- Layout.setSectionFileSize(&SD, 0);
- else
- Layout.setSectionFileSize(&SD, Address - StartAddress);
+ // Honor the section alignment requirements.
+ StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
- return Address;
+ // Set the section address.
+ SD->Address = StartAddress;
}
/// WriteFragmentData - Write the \arg F data to the output file.
@@ -522,6 +533,8 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
MCAlignFragment &AF = cast<MCAlignFragment>(F);
uint64_t Count = FragmentSize / AF.getValueSize();
+ assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
// FIXME: This error shouldn't actually occur (the front end should emit
// multiple .align directives to enforce the semantics it wants), but is
// severe enough that we want to report it. How to handle this?
@@ -535,7 +548,7 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
// the Count bytes. Then if that did not fill any bytes or there are any
// bytes left to fill use the the Value and ValueSize to fill the rest.
// If we are aligning with nops, ask that target to emit the right data.
- if (AF.getEmitNops()) {
+ if (AF.hasEmitNops()) {
if (!Asm.getBackend().WriteNopData(Count, OW))
report_fatal_error("unable to write nop sequence of " +
Twine(Count) + " bytes");
@@ -565,7 +578,10 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
case MCFragment::FT_Fill: {
MCFillFragment &FF = cast<MCFillFragment>(F);
- for (uint64_t i = 0, e = FF.getCount(); i != e; ++i) {
+
+ assert(FF.getValueSize() && "Invalid virtual align in concrete fragment!");
+
+ for (uint64_t i = 0, e = FF.getSize() / FF.getValueSize(); i != e; ++i) {
switch (FF.getValueSize()) {
default:
assert(0 && "Invalid size!");
@@ -590,11 +606,6 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
break;
}
-
- case MCFragment::FT_ZeroFill: {
- assert(0 && "Invalid zero fill fragment in concrete section!");
- break;
- }
}
assert(OW->getStream().tell() - Start == FragmentSize);
@@ -603,12 +614,27 @@ static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
void MCAssembler::WriteSectionData(const MCSectionData *SD,
const MCAsmLayout &Layout,
MCObjectWriter *OW) const {
- uint64_t SectionSize = Layout.getSectionSize(SD);
- uint64_t SectionFileSize = Layout.getSectionFileSize(SD);
-
// Ignore virtual sections.
if (getBackend().isVirtualSection(SD->getSection())) {
- assert(SectionFileSize == 0 && "Invalid size for section!");
+ assert(Layout.getSectionFileSize(SD) == 0 && "Invalid size for section!");
+
+ // Check that contents are only things legal inside a virtual section.
+ for (MCSectionData::const_iterator it = SD->begin(),
+ ie = SD->end(); it != ie; ++it) {
+ switch (it->getKind()) {
+ default:
+ assert(0 && "Invalid fragment in virtual section!");
+ case MCFragment::FT_Align:
+ assert(!cast<MCAlignFragment>(it)->getValueSize() &&
+ "Invalid align in virtual section!");
+ break;
+ case MCFragment::FT_Fill:
+ assert(!cast<MCFillFragment>(it)->getValueSize() &&
+ "Invalid fill in virtual section!");
+ break;
+ }
+ }
+
return;
}
@@ -619,11 +645,7 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
ie = SD->end(); it != ie; ++it)
WriteFragmentData(*this, Layout, *it, OW);
- // Add section padding.
- assert(SectionFileSize >= SectionSize && "Invalid section sizes!");
- OW->WriteZeros(SectionFileSize - SectionSize);
-
- assert(OW->getStream().tell() - Start == SectionFileSize);
+ assert(OW->getStream().tell() - Start == Layout.getSectionFileSize(SD));
}
void MCAssembler::Finish() {
@@ -631,20 +653,60 @@ void MCAssembler::Finish() {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
- // Assign section and fragment ordinals, all subsequent backend code is
- // responsible for updating these in place.
+ // Create the layout object.
+ MCAsmLayout Layout(*this);
+
+ // Insert additional align fragments for concrete sections to explicitly pad
+ // the previous section to match their alignment requirements. This is for
+ // 'gas' compatibility, it shouldn't strictly be necessary.
+ //
+ // FIXME: This may be Mach-O specific.
+ for (unsigned i = 1, e = Layout.getSectionOrder().size(); i < e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+
+ // Ignore sections without alignment requirements.
+ unsigned Align = SD->getAlignment();
+ if (Align <= 1)
+ continue;
+
+ // Ignore virtual sections, they don't cause file size modifications.
+ if (getBackend().isVirtualSection(SD->getSection()))
+ continue;
+
+ // Otherwise, create a new align fragment at the end of the previous
+ // section.
+ MCAlignFragment *AF = new MCAlignFragment(Align, 0, 1, Align,
+ Layout.getSectionOrder()[i - 1]);
+ AF->setOnlyAlignAddress(true);
+ }
+
+ // Create dummy fragments and assign section ordinals.
unsigned SectionIndex = 0;
- unsigned FragmentIndex = 0;
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ // Create dummy fragments to eliminate any empty sections, this simplifies
+ // layout.
+ if (it->getFragmentList().empty()) {
+ unsigned ValueSize = 1;
+ if (getBackend().isVirtualSection(it->getSection()))
+ ValueSize = 1;
+ new MCFillFragment(0, 1, 0, it);
+ }
+
it->setOrdinal(SectionIndex++);
+ }
- for (MCSectionData::iterator it2 = it->begin(),
- ie2 = it->end(); it2 != ie2; ++it2)
- it2->setOrdinal(FragmentIndex++);
+ // Assign layout order indices to sections and fragments.
+ unsigned FragmentIndex = 0;
+ for (unsigned i = 0, e = Layout.getSectionOrder().size(); i != e; ++i) {
+ MCSectionData *SD = Layout.getSectionOrder()[i];
+ SD->setLayoutOrder(i);
+
+ for (MCSectionData::iterator it2 = SD->begin(),
+ ie2 = SD->end(); it2 != ie2; ++it2)
+ it2->setLayoutOrder(FragmentIndex++);
}
// Layout until everything fits.
- MCAsmLayout Layout(*this);
while (LayoutOnce(Layout))
continue;
@@ -678,7 +740,7 @@ void MCAssembler::Finish() {
for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
- MCAsmFixup &Fixup = *it3;
+ MCFixup &Fixup = *it3;
// Evaluate the fixup.
MCValue Target;
@@ -702,7 +764,7 @@ void MCAssembler::Finish() {
stats::ObjectBytes += OS.tell() - StartOffset;
}
-bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup,
+bool MCAssembler::FixupNeedsRelaxation(const MCFixup &Fixup,
const MCFragment *DF,
const MCAsmLayout &Layout) const {
if (getRelaxAll())
@@ -725,7 +787,7 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
// If this inst doesn't ever need relaxation, ignore it. This occurs when we
// are intentionally pushing out inst fragments, or because we relaxed a
// previous instruction to one that doesn't need relaxation.
- if (!getBackend().MayNeedRelaxation(IF->getInst(), IF->getFixups()))
+ if (!getBackend().MayNeedRelaxation(IF->getInst()))
return false;
for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
@@ -739,25 +801,8 @@ bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
++stats::RelaxationSteps;
- // Layout the concrete sections and fragments.
- uint64_t Address = 0;
- for (iterator it = begin(), ie = end(); it != ie; ++it) {
- // Skip virtual sections.
- if (getBackend().isVirtualSection(it->getSection()))
- continue;
-
- // Layout the section fragments and its size.
- Address = LayoutSection(*it, Layout, Address);
- }
-
- // Layout the virtual sections.
- for (iterator it = begin(), ie = end(); it != ie; ++it) {
- if (!getBackend().isVirtualSection(it->getSection()))
- continue;
-
- // Layout the section fragments and its size.
- Address = LayoutSection(*it, Layout, Address);
- }
+ // Layout the sections in order.
+ Layout.LayoutFile();
// Scan for fragments that need relaxation.
bool WasRelaxed = false;
@@ -779,7 +824,7 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
// Relax the fragment.
MCInst Relaxed;
- getBackend().RelaxInstruction(IF, Relaxed);
+ getBackend().RelaxInstruction(IF->getInst(), Relaxed);
// Encode the new instruction.
//
@@ -796,17 +841,12 @@ bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
IF->setInst(Relaxed);
IF->getCode() = Code;
IF->getFixups().clear();
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- MCFixup &F = Fixups[i];
- IF->getFixups().push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
- F.getKind()));
- }
+ // FIXME: Eliminate copy.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i)
+ IF->getFixups().push_back(Fixups[i]);
- // Update the layout, and remember that we relaxed. If we are relaxing
- // everything, we can skip this step since nothing will depend on updating
- // the values.
- if (!getRelaxAll())
- Layout.UpdateForSlide(IF, SlideAmount);
+ // Update the layout, and remember that we relaxed.
+ Layout.UpdateForSlide(IF, SlideAmount);
WasRelaxed = true;
}
}
@@ -838,12 +878,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
SD.getFragmentList().insert(it2, DF);
// Update the data fragments layout data.
- //
- // FIXME: Add MCAsmLayout utility for this.
DF->setParent(IF->getParent());
- DF->setOrdinal(IF->getOrdinal());
- Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF));
- Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF));
+ DF->setAtom(IF->getAtom());
+ DF->setLayoutOrder(IF->getLayoutOrder());
+ Layout.FragmentReplaced(IF, DF);
// Copy in the data and the fixups.
DF->getContents().append(IF->getCode().begin(), IF->getCode().end());
@@ -861,9 +899,10 @@ void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
- OS << "<MCAsmFixup" << " Offset:" << AF.Offset << " Value:" << *AF.Value
- << " Kind:" << AF.Kind << ">";
+raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
+ OS << "<MCFixup" << " Offset:" << AF.getOffset()
+ << " Value:" << *AF.getValue()
+ << " Kind:" << AF.getKind() << ">";
return OS;
}
@@ -872,94 +911,82 @@ raw_ostream &operator<<(raw_ostream &OS, const MCAsmFixup &AF) {
void MCFragment::dump() {
raw_ostream &OS = llvm::errs();
- OS << "<MCFragment " << (void*) this << " Offset:" << Offset
- << " EffectiveSize:" << EffectiveSize;
-
- OS << ">";
-}
-
-void MCAlignFragment::dump() {
- raw_ostream &OS = llvm::errs();
+ OS << "<";
+ switch (getKind()) {
+ case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+ case MCFragment::FT_Data: OS << "MCDataFragment"; break;
+ case MCFragment::FT_Fill: OS << "MCFillFragment"; break;
+ case MCFragment::FT_Inst: OS << "MCInstFragment"; break;
+ case MCFragment::FT_Org: OS << "MCOrgFragment"; break;
+ }
- OS << "<MCAlignFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Alignment:" << getAlignment()
- << " Value:" << getValue() << " ValueSize:" << getValueSize()
- << " MaxBytesToEmit:" << getMaxBytesToEmit() << ">";
-}
+ OS << "<MCFragment " << (void*) this << " LayoutOrder:" << LayoutOrder
+ << " Offset:" << Offset << " EffectiveSize:" << EffectiveSize << ">";
-void MCDataFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCDataFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Contents:[";
- for (unsigned i = 0, e = getContents().size(); i != e; ++i) {
- if (i) OS << ",";
- OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
- }
- OS << "] (" << getContents().size() << " bytes)";
-
- if (!getFixups().empty()) {
- OS << ",\n ";
- OS << " Fixups:[";
- for (fixup_iterator it = fixup_begin(), ie = fixup_end(); it != ie; ++it) {
- if (it != fixup_begin()) OS << ",\n ";
- OS << *it;
+ switch (getKind()) {
+ case MCFragment::FT_Align: {
+ const MCAlignFragment *AF = cast<MCAlignFragment>(this);
+ if (AF->hasEmitNops())
+ OS << " (emit nops)";
+ if (AF->hasOnlyAlignAddress())
+ OS << " (only align section)";
+ OS << "\n ";
+ OS << " Alignment:" << AF->getAlignment()
+ << " Value:" << AF->getValue() << " ValueSize:" << AF->getValueSize()
+ << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
+ break;
+ }
+ case MCFragment::FT_Data: {
+ const MCDataFragment *DF = cast<MCDataFragment>(this);
+ OS << "\n ";
+ OS << " Contents:[";
+ const SmallVectorImpl<char> &Contents = DF->getContents();
+ for (unsigned i = 0, e = Contents.size(); i != e; ++i) {
+ if (i) OS << ",";
+ OS << hexdigit((Contents[i] >> 4) & 0xF) << hexdigit(Contents[i] & 0xF);
}
- OS << "]";
+ OS << "] (" << Contents.size() << " bytes)";
+
+ if (!DF->getFixups().empty()) {
+ OS << ",\n ";
+ OS << " Fixups:[";
+ for (MCDataFragment::const_fixup_iterator it = DF->fixup_begin(),
+ ie = DF->fixup_end(); it != ie; ++it) {
+ if (it != DF->fixup_begin()) OS << ",\n ";
+ OS << *it;
+ }
+ OS << "]";
+ }
+ break;
+ }
+ case MCFragment::FT_Fill: {
+ const MCFillFragment *FF = cast<MCFillFragment>(this);
+ OS << " Value:" << FF->getValue() << " ValueSize:" << FF->getValueSize()
+ << " Size:" << FF->getSize();
+ break;
+ }
+ case MCFragment::FT_Inst: {
+ const MCInstFragment *IF = cast<MCInstFragment>(this);
+ OS << "\n ";
+ OS << " Inst:";
+ IF->getInst().dump_pretty(OS);
+ break;
+ }
+ case MCFragment::FT_Org: {
+ const MCOrgFragment *OF = cast<MCOrgFragment>(this);
+ OS << "\n ";
+ OS << " Offset:" << OF->getOffset() << " Value:" << OF->getValue();
+ break;
+ }
}
-
- OS << ">";
-}
-
-void MCFillFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCFillFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Value:" << getValue() << " ValueSize:" << getValueSize()
- << " Count:" << getCount() << ">";
-}
-
-void MCInstFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCInstFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Inst:";
- getInst().dump_pretty(OS);
OS << ">";
}
-void MCOrgFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCOrgFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Offset:" << getOffset() << " Value:" << getValue() << ">";
-}
-
-void MCZeroFillFragment::dump() {
- raw_ostream &OS = llvm::errs();
-
- OS << "<MCZeroFillFragment ";
- this->MCFragment::dump();
- OS << "\n ";
- OS << " Size:" << getSize() << " Alignment:" << getAlignment() << ">";
-}
-
void MCSectionData::dump() {
raw_ostream &OS = llvm::errs();
OS << "<MCSectionData";
OS << " Alignment:" << getAlignment() << " Address:" << Address
- << " Size:" << Size << " FileSize:" << FileSize
<< " Fragments:[\n ";
for (iterator it = begin(), ie = end(); it != ie; ++it) {
if (it != begin()) OS << ",\n ";
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index dc757bb..53ffc94 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -11,18 +11,22 @@
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCLabel.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
using namespace llvm;
typedef StringMap<const MCSectionMachO*> MachOUniqueMapTy;
typedef StringMap<const MCSectionELF*> ELFUniqueMapTy;
+typedef StringMap<const MCSectionCOFF*> COFFUniqueMapTy;
MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) {
MachOUniquingMap = 0;
ELFUniquingMap = 0;
+ COFFUniquingMap = 0;
}
MCContext::~MCContext() {
@@ -32,6 +36,7 @@ MCContext::~MCContext() {
// If we have the MachO uniquing map, free it.
delete (MachOUniqueMapTy*)MachOUniquingMap;
delete (ELFUniqueMapTy*)ELFUniquingMap;
+ delete (COFFUniqueMapTy*)COFFUniquingMap;
}
//===----------------------------------------------------------------------===//
@@ -67,6 +72,34 @@ MCSymbol *MCContext::CreateTempSymbol() {
"tmp" + Twine(NextUniqueID++));
}
+unsigned MCContext::NextInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->incInstance();
+}
+
+unsigned MCContext::GetInstance(int64_t LocalLabelVal) {
+ MCLabel *&Label = Instances[LocalLabelVal];
+ if (!Label)
+ Label = new (*this) MCLabel(0);
+ return Label->getInstance();
+}
+
+MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(NextInstance(LocalLabelVal)));
+}
+MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal,
+ int bORf) {
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ Twine(LocalLabelVal) +
+ "\2" +
+ Twine(GetInstance(LocalLabelVal) + bORf));
+}
+
MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
return Symbols.lookup(Name);
}
@@ -122,4 +155,22 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags,
return Result;
}
-
+const MCSection *MCContext::getCOFFSection(StringRef Section,
+ unsigned Characteristics,
+ int Selection,
+ SectionKind Kind) {
+ if (COFFUniquingMap == 0)
+ COFFUniquingMap = new COFFUniqueMapTy();
+ COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap;
+
+ // Do the lookup, if we have a hit, return it.
+ StringMapEntry<const MCSectionCOFF*> &Entry = Map.GetOrCreateValue(Section);
+ if (Entry.getValue()) return Entry.getValue();
+
+ MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(),
+ Characteristics,
+ Selection, Kind);
+
+ Entry.setValue(Result);
+ return Result;
+}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index bc670ab..c000dd7 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -39,6 +39,10 @@ void MCExpr::print(raw_ostream &OS) const {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this);
const MCSymbol &Sym = SRE.getSymbol();
+ if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 ||
+ SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16)
+ OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
+
// Parenthesize names that start with $ so that they don't look like
// absolute names.
if (Sym.getName()[0] == '$')
@@ -46,7 +50,9 @@ void MCExpr::print(raw_ostream &OS) const {
else
OS << Sym;
- if (SRE.getKind() != MCSymbolRefExpr::VK_None)
+ if (SRE.getKind() != MCSymbolRefExpr::VK_None &&
+ SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 &&
+ SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16)
OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind());
return;
@@ -169,6 +175,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PLT: return "PLT";
case VK_TLSGD: return "TLSGD";
case VK_TPOFF: return "TPOFF";
+ case VK_ARM_HI16: return ":upper16:";
+ case VK_ARM_LO16: return ":lower16:";
+ case VK_TLVP: return "TLVP";
}
}
@@ -184,6 +193,7 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
.Case("PLT", VK_PLT)
.Case("TLSGD", VK_TLSGD)
.Case("TPOFF", VK_TPOFF)
+ .Case("TLVP", VK_TLVP)
.Default(VK_Invalid);
}
@@ -249,7 +259,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
// Evaluate recursively if this is a variable.
if (Sym.isVariable()) {
- if (!Sym.getValue()->EvaluateAsRelocatable(Res, Layout))
+ if (!Sym.getVariableValue()->EvaluateAsRelocatable(Res, Layout))
return false;
// Absolutize symbol differences between defined symbols when we have a
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index de142dc..4cb628b 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -57,7 +57,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
OS << Separator;
getOperand(i).print(OS, MAI);
}
- OS << ">\n";
+ OS << ">";
}
void MCInst::dump() const {
diff --git a/lib/MC/MCLabel.cpp b/lib/MC/MCLabel.cpp
new file mode 100644
index 0000000..9c0fc92
--- /dev/null
+++ b/lib/MC/MCLabel.cpp
@@ -0,0 +1,21 @@
+//===- lib/MC/MCLabel.cpp - MCLabel implementation ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCLabel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void MCLabel::print(raw_ostream &OS) const {
+ OS << '"' << getInstance() << '"';
+}
+
+void MCLabel::dump() const {
+ print(dbgs());
+}
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
new file mode 100644
index 0000000..b96040a
--- /dev/null
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -0,0 +1,208 @@
+//===- lib/MC/MCLoggingStreamer.cpp - API Logging Streamer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+class MCLoggingStreamer : public MCStreamer {
+ llvm::OwningPtr<MCStreamer> Child;
+
+ raw_ostream &OS;
+
+public:
+ MCLoggingStreamer(MCStreamer *_Child, raw_ostream &_OS)
+ : MCStreamer(_Child->getContext()), Child(_Child), OS(_OS) {}
+
+ void LogCall(const char *Function) {
+ OS << Function << "\n";
+ }
+
+ void LogCall(const char *Function, const Twine &Message) {
+ OS << Function << ": " << Message << "\n";
+ }
+
+ virtual bool isVerboseAsm() const { return Child->isVerboseAsm(); }
+
+ virtual bool hasRawTextSupport() const { return Child->hasRawTextSupport(); }
+
+ virtual raw_ostream &GetCommentOS() { return Child->GetCommentOS(); }
+
+ virtual void AddComment(const Twine &T) {
+ LogCall("AddComment", T);
+ return Child->AddComment(T);
+ }
+
+ virtual void AddBlankLine() {
+ LogCall("AddBlankLine");
+ return Child->AddBlankLine();
+ }
+
+ virtual void SwitchSection(const MCSection *Section) {
+ CurSection = Section;
+ LogCall("SwitchSection");
+ return Child->SwitchSection(Section);
+ }
+
+ virtual void EmitLabel(MCSymbol *Symbol) {
+ LogCall("EmitLabel");
+ return Child->EmitLabel(Symbol);
+ }
+
+ virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {
+ LogCall("EmitAssemblerFlag");
+ return Child->EmitAssemblerFlag(Flag);
+ }
+
+ virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
+ LogCall("EmitAssignment");
+ return Child->EmitAssignment(Symbol, Value);
+ }
+
+ virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
+ LogCall("EmitSymbolAttribute");
+ return Child->EmitSymbolAttribute(Symbol, Attribute);
+ }
+
+ virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
+ LogCall("EmitSymbolDesc");
+ return Child->EmitSymbolDesc(Symbol, DescValue);
+ }
+
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ LogCall("BeginCOFFSymbolDef");
+ return Child->BeginCOFFSymbolDef(Symbol);
+ }
+
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ LogCall("EmitCOFFSymbolStorageClass");
+ return Child->EmitCOFFSymbolStorageClass(StorageClass);
+ }
+
+ virtual void EmitCOFFSymbolType(int Type) {
+ LogCall("EmitCOFFSymbolType");
+ return Child->EmitCOFFSymbolType(Type);
+ }
+
+ virtual void EndCOFFSymbolDef() {
+ LogCall("EndCOFFSymbolDef");
+ return Child->EndCOFFSymbolDef();
+ }
+
+ virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+ LogCall("EmitELFSize");
+ return Child->EmitELFSize(Symbol, Value);
+ }
+
+ virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
+ unsigned ByteAlignment) {
+ LogCall("EmitCommonSymbol");
+ return Child->EmitCommonSymbol(Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) {
+ LogCall("EmitLocalCommonSymbol");
+ return Child->EmitLocalCommonSymbol(Symbol, Size);
+ }
+
+ virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
+ unsigned Size = 0, unsigned ByteAlignment = 0) {
+ LogCall("EmitZerofill");
+ return Child->EmitZerofill(Section, Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0) {
+ LogCall("EmitTBSSSymbol");
+ return Child->EmitTBSSSymbol(Section, Symbol, Size, ByteAlignment);
+ }
+
+ virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {
+ LogCall("EmitBytes");
+ return Child->EmitBytes(Data, AddrSpace);
+ }
+
+ virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace){
+ LogCall("EmitValue");
+ return Child->EmitValue(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace) {
+ LogCall("EmitIntValue");
+ return Child->EmitIntValue(Value, Size, AddrSpace);
+ }
+
+ virtual void EmitGPRel32Value(const MCExpr *Value) {
+ LogCall("EmitGPRel32Value");
+ return Child->EmitGPRel32Value(Value);
+ }
+
+ virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
+ unsigned AddrSpace) {
+ LogCall("EmitFill");
+ return Child->EmitFill(NumBytes, FillValue, AddrSpace);
+ }
+
+ virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+ unsigned ValueSize = 1,
+ unsigned MaxBytesToEmit = 0) {
+ LogCall("EmitValueToAlignment");
+ return Child->EmitValueToAlignment(ByteAlignment, Value,
+ ValueSize, MaxBytesToEmit);
+ }
+
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {
+ LogCall("EmitCodeAlignment");
+ return Child->EmitCodeAlignment(ByteAlignment, MaxBytesToEmit);
+ }
+
+ virtual void EmitValueToOffset(const MCExpr *Offset,
+ unsigned char Value = 0) {
+ LogCall("EmitValueToOffset");
+ return Child->EmitValueToOffset(Offset, Value);
+ }
+
+ virtual void EmitFileDirective(StringRef Filename) {
+ LogCall("EmitFileDirective", "FileName:" + Filename);
+ return Child->EmitFileDirective(Filename);
+ }
+
+ virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
+ LogCall("EmitDwarfFileDirective",
+ "FileNo:" + Twine(FileNo) + " Filename:" + Filename);
+ return Child->EmitDwarfFileDirective(FileNo, Filename);
+ }
+
+ virtual void EmitInstruction(const MCInst &Inst) {
+ LogCall("EmitInstruction");
+ return Child->EmitInstruction(Inst);
+ }
+
+ virtual void EmitRawText(StringRef String) {
+ LogCall("EmitRawText", "\"" + String + "\"");
+ return Child->EmitRawText(String);
+ }
+
+ virtual void Finish() {
+ LogCall("Finish");
+ return Child->Finish();
+ }
+
+};
+
+} // end anonymous namespace.
+
+MCStreamer *llvm::createLoggingStreamer(MCStreamer *Child, raw_ostream &OS) {
+ return new MCLoggingStreamer(Child, OS);
+}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 120f837..27e4e98 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmBackend.h"
@@ -25,30 +26,14 @@ using namespace llvm;
namespace {
class MCMachOStreamer : public MCStreamer {
- /// SymbolFlags - We store the value for the 'desc' symbol field in the lowest
- /// 16 bits of the implementation defined flags.
- enum SymbolFlags { // See <mach-o/nlist.h>.
- SF_DescFlagsMask = 0xFFFF,
-
- // Reference type flags.
- SF_ReferenceTypeMask = 0x0007,
- SF_ReferenceTypeUndefinedNonLazy = 0x0000,
- SF_ReferenceTypeUndefinedLazy = 0x0001,
- SF_ReferenceTypeDefined = 0x0002,
- SF_ReferenceTypePrivateDefined = 0x0003,
- SF_ReferenceTypePrivateUndefinedNonLazy = 0x0004,
- SF_ReferenceTypePrivateUndefinedLazy = 0x0005,
-
- // Other 'desc' flags.
- SF_NoDeadStrip = 0x0020,
- SF_WeakReference = 0x0040,
- SF_WeakDefinition = 0x0080
- };
private:
MCAssembler Assembler;
MCSectionData *CurSectionData;
+ /// Track the current atom for each section.
+ DenseMap<const MCSectionData*, MCSymbolData*> CurrentAtomMap;
+
private:
MCFragment *getCurrentFragment() const {
assert(CurSectionData && "No current section!");
@@ -64,10 +49,20 @@ private:
MCDataFragment *getOrCreateDataFragment() const {
MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (!F)
- F = new MCDataFragment(CurSectionData);
+ F = createDataFragment();
return F;
}
+ /// Create a new data fragment in the current section.
+ MCDataFragment *createDataFragment() const {
+ MCDataFragment *DF = new MCDataFragment(CurSectionData);
+ DF->setAtom(CurrentAtomMap.lookup(CurSectionData));
+ return DF;
+ }
+
+ void EmitInstToFragment(const MCInst &Inst);
+ void EmitInstToData(const MCInst &Inst);
+
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
raw_ostream &_OS, MCCodeEmitter *_Emitter)
@@ -114,6 +109,18 @@ public:
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EmitCOFFSymbolType(int Type) {
+ assert(0 && "macho doesn't support this directive");
+ }
+ virtual void EndCOFFSymbolDef() {
+ assert(0 && "macho doesn't support this directive");
+ }
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
assert(0 && "macho doesn't support this directive");
}
@@ -122,6 +129,8 @@ public:
}
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment = 0);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
virtual void EmitValue(const MCExpr *Value, unsigned Size,unsigned AddrSpace);
virtual void EmitGPRel32Value(const MCExpr *Value) {
@@ -134,14 +143,14 @@ public:
unsigned MaxBytesToEmit = 0);
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
-
+
virtual void EmitFileDirective(StringRef Filename) {
- errs() << "FIXME: MCMachoStreamer:EmitFileDirective not implemented\n";
+ report_fatal_error("unsupported directive: '.file'");
}
virtual void EmitDwarfFileDirective(unsigned FileNo, StringRef Filename) {
- errs() << "FIXME: MCMachoStreamer:EmitDwarfFileDirective not implemented\n";
+ report_fatal_error("unsupported directive: '.file'");
}
-
+
virtual void EmitInstruction(const MCInst &Inst);
virtual void Finish();
@@ -152,7 +161,7 @@ public:
void MCMachOStreamer::SwitchSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
-
+
// If already in this section, then this is a noop.
if (Section == CurSection) return;
@@ -162,20 +171,39 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) {
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
+ assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
+ assert(CurSection && "Cannot emit before setting section!");
+
+ MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+
+ // Update the current atom map, if necessary.
+ bool MustCreateFragment = false;
+ if (Assembler.isSymbolLinkerVisible(&SD)) {
+ CurrentAtomMap[CurSectionData] = &SD;
+
+ // We have to create a new fragment, fragments cannot span atoms.
+ MustCreateFragment = true;
+ }
// FIXME: This is wasteful, we don't necessarily need to create a data
// fragment. Instead, we should mark the symbol as pointing into the data
// fragment if it exists, otherwise we should just queue the label and set its
// fragment pointer when we emit the next fragment.
- MCDataFragment *F = getOrCreateDataFragment();
- MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
+ MCDataFragment *F =
+ MustCreateFragment ? createDataFragment() : getOrCreateDataFragment();
assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
SD.setFragment(F);
SD.setOffset(F->getContents().size());
- // This causes the reference type and weak reference flags to be cleared.
- SD.setFlags(SD.getFlags() & ~(SF_WeakReference | SF_ReferenceTypeMask));
-
+ // This causes the reference type flag to be cleared. Darwin 'as' was "trying"
+ // to clear the weak reference and weak definition bits too, but the
+ // implementation was buggy. For now we just try to match 'as', for
+ // diffability.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask);
+
Symbol->setSection(*CurSection);
}
@@ -190,13 +218,9 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {
}
void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
- // Only absolute symbols can be redefined.
- assert((Symbol->isUndefined() || Symbol->isAbsolute()) &&
- "Cannot define a symbol twice!");
-
// FIXME: Lift context changes into super class.
- // FIXME: Set associated section.
- Symbol->setValue(AddValueSymbols(Value));
+ Assembler.getOrCreateSymbolData(*Symbol);
+ Symbol->setVariableValue(AddValueSymbols(Value));
}
void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
@@ -243,6 +267,13 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
case MCSA_Global:
SD.setExternal(true);
+ // This effectively clears the undefined lazy bit, in Darwin 'as', although
+ // it isn't very consistent because it implements this as part of symbol
+ // lookup.
+ //
+ // FIXME: Cleanup this code, these bits should be emitted based on semantic
+ // properties, not on the order of definition, etc.
+ SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeUndefinedLazy);
break;
case MCSA_LazyReference:
@@ -280,7 +311,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
// Encode the 'desc' value into the lowest implementation defined bits.
- assert(DescValue == (DescValue & SF_DescFlagsMask) &&
+ assert(DescValue == (DescValue & SF_DescFlagsMask) &&
"Invalid .desc value!");
Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask);
}
@@ -309,8 +340,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
- MCFragment *F = new MCZeroFillFragment(Size, ByteAlignment, &SectData);
+ // Emit an align fragment if necessary.
+ if (ByteAlignment != 1)
+ new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectData);
+
+ MCFragment *F = new MCFillFragment(0, 0, Size, &SectData);
SD.setFragment(F);
+ if (Assembler.isSymbolLinkerVisible(&SD))
+ F->setAtom(&SD);
Symbol->setSection(*Section);
@@ -319,6 +356,14 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
SectData.setAlignment(ByteAlignment);
}
+// This should always be called with the thread local bss section. Like the
+// .zerofill directive this doesn't actually switch sections on us.
+void MCMachOStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {
+ EmitZerofill(Section, Symbol, Size, ByteAlignment);
+ return;
+}
+
void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
@@ -334,8 +379,9 @@ void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
for (unsigned i = 0; i != Size; ++i)
DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
} else {
- DF->addFixup(MCAsmFixup(DF->getContents().size(), *AddValueSymbols(Value),
- MCFixup::getKindForSize(Size)));
+ DF->addFixup(MCFixup::Create(DF->getContents().size(),
+ AddValueSymbols(Value),
+ MCFixup::getKindForSize(Size)));
DF->getContents().resize(DF->getContents().size() + Size, 0);
}
}
@@ -345,8 +391,9 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
- false /* EmitNops */, CurSectionData);
+ MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize,
+ MaxBytesToEmit, CurSectionData);
+ F->setAtom(CurrentAtomMap.lookup(CurSectionData));
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > CurSectionData->getAlignment())
@@ -357,8 +404,10 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
- true /* EmitNops */, CurSectionData);
+ MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
+ CurSectionData);
+ F->setEmitNops(true);
+ F->setAtom(CurrentAtomMap.lookup(CurSectionData));
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > CurSectionData->getAlignment())
@@ -367,19 +416,30 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
- new MCOrgFragment(*Offset, Value, CurSectionData);
+ MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData);
+ F->setAtom(CurrentAtomMap.lookup(CurSectionData));
}
-void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
- // Scan for values.
- for (unsigned i = 0; i != Inst.getNumOperands(); ++i)
- if (Inst.getOperand(i).isExpr())
- AddValueSymbols(Inst.getOperand(i).getExpr());
+void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) {
+ MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
+ IF->setAtom(CurrentAtomMap.lookup(CurSectionData));
- CurSectionData->setHasInstructions(true);
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
+ VecOS.flush();
- // FIXME-PERF: Common case is that we don't need to relax, encode directly
- // onto the data fragments buffers.
+ IF->getCode() = Code;
+ IF->getFixups() = Fixups;
+}
+
+void MCMachOStreamer::EmitInstToData(const MCInst &Inst) {
+ MCDataFragment *DF = getOrCreateDataFragment();
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
@@ -387,47 +447,41 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
VecOS.flush();
- // FIXME: Eliminate this copy.
- SmallVector<MCAsmFixup, 4> AsmFixups;
+ // Add the fixups and data.
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- MCFixup &F = Fixups[i];
- AsmFixups.push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
- F.getKind()));
+ Fixups[i].setOffset(Fixups[i].getOffset() + DF->getContents().size());
+ DF->addFixup(Fixups[i]);
}
+ DF->getContents().append(Code.begin(), Code.end());
+}
- // See if we might need to relax this instruction, if so it needs its own
- // fragment.
- //
- // FIXME-PERF: Support target hook to do a fast path that avoids the encoder,
- // when we can immediately tell that we will get something which might need
- // relaxation (and compute its size).
- //
- // FIXME-PERF: We should also be smart about immediately relaxing instructions
- // which we can already show will never possibly fit (we can also do a very
- // good job of this before we do the first relaxation pass, because we have
- // total knowledge about undefined symbols at that point). Even now, though,
- // we can do a decent job, especially on Darwin where scattering means that we
- // are going to often know that we can never fully resolve a fixup.
- if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) {
- MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
-
- // Add the fixups and data.
- //
- // FIXME: Revisit this design decision when relaxation is done, we may be
- // able to get away with not storing any extra data in the MCInst.
- IF->getCode() = Code;
- IF->getFixups() = AsmFixups;
+void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
+ // Scan for values.
+ for (unsigned i = Inst.getNumOperands(); i--; )
+ if (Inst.getOperand(i).isExpr())
+ AddValueSymbols(Inst.getOperand(i).getExpr());
+ CurSectionData->setHasInstructions(true);
+
+ // If this instruction doesn't need relaxation, just emit it as data.
+ if (!Assembler.getBackend().MayNeedRelaxation(Inst)) {
+ EmitInstToData(Inst);
return;
}
- // Add the fixups and data.
- MCDataFragment *DF = getOrCreateDataFragment();
- for (unsigned i = 0, e = AsmFixups.size(); i != e; ++i) {
- AsmFixups[i].Offset += DF->getContents().size();
- DF->addFixup(AsmFixups[i]);
+ // Otherwise, if we are relaxing everything, relax the instruction as much as
+ // possible and emit it as data.
+ if (Assembler.getRelaxAll()) {
+ MCInst Relaxed;
+ Assembler.getBackend().RelaxInstruction(Inst, Relaxed);
+ while (Assembler.getBackend().MayNeedRelaxation(Relaxed))
+ Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed);
+ EmitInstToData(Relaxed);
+ return;
}
- DF->getContents().append(Code.begin(), Code.end());
+
+ // Otherwise emit to a separate fragment.
+ EmitInstToFragment(Inst);
}
void MCMachOStreamer::Finish() {
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 5f0c64a..5332ade 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -42,6 +42,12 @@ namespace {
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
+
+ virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
+ virtual void EmitCOFFSymbolStorageClass(int StorageClass) {}
+ virtual void EmitCOFFSymbolType(int Type) {}
+ virtual void EndCOFFSymbolDef() {}
+
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
@@ -49,7 +55,8 @@ namespace {
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0) {}
-
+ virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
+ uint64_t Size, unsigned ByteAlignment) {}
virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {}
virtual void EmitValue(const MCExpr *Value, unsigned Size,
diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp
index 1183312..1cbe09a 100644
--- a/lib/MC/MCParser/AsmLexer.cpp
+++ b/lib/MC/MCParser/AsmLexer.cpp
@@ -132,11 +132,6 @@ AsmToken AsmLexer::LexLineComment() {
/// Decimal integer: [1-9][0-9]*
/// TODO: FP literal.
AsmToken AsmLexer::LexDigit() {
- if (*CurPtr == ':')
- return ReturnError(TokStart, "FIXME: local label not implemented");
- if (*CurPtr == 'f' || *CurPtr == 'b')
- return ReturnError(TokStart, "FIXME: directional label not implemented");
-
// Decimal integer: [1-9][0-9]*
if (CurPtr[-1] != '0') {
while (isdigit(*CurPtr))
@@ -158,6 +153,12 @@ AsmToken AsmLexer::LexDigit() {
if (*CurPtr == 'b') {
++CurPtr;
+ // See if we actually have "0b" as part of something like "jmp 0b\n"
+ if (!isdigit(CurPtr[0])) {
+ --CurPtr;
+ StringRef Result(TokStart, CurPtr - TokStart);
+ return AsmToken(AsmToken::Integer, Result, 0);
+ }
const char *NumStart = CurPtr;
while (CurPtr[0] == '0' || CurPtr[0] == '1')
++CurPtr;
@@ -280,6 +281,7 @@ AsmToken AsmLexer::LexToken() {
case '*': return AsmToken(AsmToken::Star, StringRef(TokStart, 1));
case ',': return AsmToken(AsmToken::Comma, StringRef(TokStart, 1));
case '$': return AsmToken(AsmToken::Dollar, StringRef(TokStart, 1));
+ case '@': return AsmToken(AsmToken::At, StringRef(TokStart, 1));
case '=':
if (*CurPtr == '=')
return ++CurPtr, AsmToken(AsmToken::EqualEqual, StringRef(TokStart, 2));
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index a63d2e4..4523eab 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -13,6 +13,7 @@
#include "llvm/MC/MCParser/AsmParser.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
@@ -189,6 +190,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
std::pair<StringRef, StringRef> Split = getTok().getIdentifier().split('@');
MCSymbol *Sym = CreateSymbol(Split.first);
+ // Mark the symbol as used in an expression.
+ Sym->setUsedInExpr(true);
+
// Lookup the symbol variant if used.
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != getTok().getIdentifier().size())
@@ -199,11 +203,11 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
- if (Sym->getValue() && isa<MCConstantExpr>(Sym->getValue())) {
+ if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {
if (Variant)
return Error(EndLoc, "unexpected modified on variable reference");
- Res = Sym->getValue();
+ Res = Sym->getVariableValue();
return false;
}
@@ -211,11 +215,28 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
Res = MCSymbolRefExpr::Create(Sym, Variant, getContext());
return false;
}
- case AsmToken::Integer:
- Res = MCConstantExpr::Create(getTok().getIntVal(), getContext());
+ case AsmToken::Integer: {
+ SMLoc Loc = getTok().getLoc();
+ int64_t IntVal = getTok().getIntVal();
+ Res = MCConstantExpr::Create(IntVal, getContext());
EndLoc = Lexer.getLoc();
Lex(); // Eat token.
+ // Look for 'b' or 'f' following an Integer as a directional label
+ if (Lexer.getKind() == AsmToken::Identifier) {
+ StringRef IDVal = getTok().getString();
+ if (IDVal == "f" || IDVal == "b"){
+ MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal,
+ IDVal == "f" ? 1 : 0);
+ Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None,
+ getContext());
+ if(IDVal == "b" && Sym->isUndefined())
+ return Error(Loc, "invalid reference to undefined symbol");
+ EndLoc = Lexer.getLoc();
+ Lex(); // Eat identifier.
+ }
+ }
return false;
+ }
case AsmToken::Dot: {
// This is a '.' reference, which references the current PC. Emit a
// temporary label to the streamer and refer to it.
@@ -411,6 +432,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
/// ::= Label* Identifier OperandList* EndOfStatement
bool AsmParser::ParseStatement() {
if (Lexer.is(AsmToken::EndOfStatement)) {
+ Out.AddBlankLine();
Lex();
return false;
}
@@ -419,7 +441,25 @@ bool AsmParser::ParseStatement() {
AsmToken ID = getTok();
SMLoc IDLoc = ID.getLoc();
StringRef IDVal;
- if (ParseIdentifier(IDVal)) {
+ int64_t LocalLabelVal = -1;
+ // GUESS allow an integer followed by a ':' as a directional local label
+ if (Lexer.is(AsmToken::Integer)) {
+ LocalLabelVal = getTok().getIntVal();
+ if (LocalLabelVal < 0) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ IDVal = "";
+ }
+ else {
+ IDVal = getTok().getString();
+ Lex(); // Consume the integer token to be used as an identifier token.
+ if (Lexer.getKind() != AsmToken::Colon) {
+ if (!TheCondState.Ignore)
+ return TokError("unexpected token at start of statement");
+ }
+ }
+ }
+ else if (ParseIdentifier(IDVal)) {
if (!TheCondState.Ignore)
return TokError("unexpected token at start of statement");
IDVal = "";
@@ -456,13 +496,25 @@ bool AsmParser::ParseStatement() {
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: This doesn't diagnose assignment to a symbol which has been
// implicitly marked as external.
- MCSymbol *Sym = CreateSymbol(IDVal);
- if (!Sym->isUndefined())
+ MCSymbol *Sym;
+ if (LocalLabelVal == -1)
+ Sym = CreateSymbol(IDVal);
+ else
+ Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal);
+ if (!Sym->isUndefined() || Sym->isVariable())
return Error(IDLoc, "invalid symbol redefinition");
// Emit the label.
Out.EmitLabel(Sym);
+ // Consume any end of statement token, if present, to avoid spurious
+ // AddBlankLine calls().
+ if (Lexer.is(AsmToken::EndOfStatement)) {
+ Lex();
+ if (Lexer.is(AsmToken::Eof))
+ return false;
+ }
+
return ParseStatement();
}
@@ -620,6 +672,16 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs",
MCSectionMachO::S_CSTRING_LITERALS);
+ if (IDVal == ".tdata")
+ return ParseDirectiveSectionSwitch("__DATA", "__thread_data",
+ MCSectionMachO::S_THREAD_LOCAL_REGULAR);
+ if (IDVal == ".tlv")
+ return ParseDirectiveSectionSwitch("__DATA", "__thread_vars",
+ MCSectionMachO::S_THREAD_LOCAL_VARIABLES);
+ if (IDVal == ".thread_init_func")
+ return ParseDirectiveSectionSwitch("__DATA", "__thread_init",
+ MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS);
+
// Assembler features
if (IDVal == ".set")
return ParseDirectiveSet();
@@ -686,6 +748,8 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveSymbolAttribute(MCSA_Protected);
if (IDVal == ".reference")
return ParseDirectiveSymbolAttribute(MCSA_Reference);
+ if (IDVal == ".type")
+ return ParseDirectiveELFType();
if (IDVal == ".weak")
return ParseDirectiveSymbolAttribute(MCSA_Weak);
if (IDVal == ".weak_definition")
@@ -703,6 +767,8 @@ bool AsmParser::ParseStatement() {
return ParseDirectiveDarwinSymbolDesc();
if (IDVal == ".lsym")
return ParseDirectiveDarwinLsym();
+ if (IDVal == ".tbss")
+ return ParseDirectiveDarwinTBSS();
if (IDVal == ".subsections_via_symbols")
return ParseDirectiveDarwinSubsectionsViaSymbols();
@@ -729,8 +795,13 @@ bool AsmParser::ParseStatement() {
return false;
}
+ // Canonicalize the opcode to lower case.
+ SmallString<128> Opcode;
+ for (unsigned i = 0, e = IDVal.size(); i != e; ++i)
+ Opcode.push_back(tolower(IDVal[i]));
+
SmallVector<MCParsedAsmOperand*, 8> ParsedOperands;
- bool HadError = getTargetParser().ParseInstruction(IDVal, IDLoc,
+ bool HadError = getTargetParser().ParseInstruction(Opcode.str(), IDLoc,
ParsedOperands);
if (!HadError && Lexer.isNot(AsmToken::EndOfStatement))
HadError = TokError("unexpected token in argument list");
@@ -786,11 +857,13 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
//
// FIXME: Diagnostics. Note the location of the definition as a label.
// FIXME: Diagnose assignment to protected identifier (e.g., register name).
- if (!Sym->isUndefined() && !Sym->isAbsolute())
+ if (Sym->isUndefined() && !Sym->isUsedInExpr())
+ ; // Allow redefinitions of undefined symbols only used in directives.
+ else if (!Sym->isUndefined() && !Sym->isAbsolute())
return Error(EqualLoc, "redefinition of '" + Name + "'");
else if (!Sym->isVariable())
return Error(EqualLoc, "invalid assignment to '" + Name + "'");
- else if (!isa<MCConstantExpr>(Sym->getValue()))
+ else if (!isa<MCConstantExpr>(Sym->getVariableValue()))
return Error(EqualLoc, "invalid reassignment of non-absolute variable '" +
Name + "'");
} else
@@ -798,6 +871,8 @@ bool AsmParser::ParseAssignment(const StringRef &Name) {
// FIXME: Handle '.'.
+ Sym->setUsedInExpr(true);
+
// Do the assignment.
Out.EmitAssignment(Sym, Value);
@@ -1008,7 +1083,11 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
if (ParseExpression(Value))
return true;
- Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE);
+ // Special case constant expressions to match code generator.
+ if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
+ Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
+ else
+ Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE);
if (Lexer.is(AsmToken::EndOfStatement))
break;
@@ -1090,8 +1169,7 @@ bool AsmParser::ParseDirectiveFill() {
return TokError("invalid '.fill' size, expected 1, 2, 4, or 8");
for (uint64_t i = 0, e = NumValues; i != e; ++i)
- Out.EmitValue(MCConstantExpr::Create(FillExpr, getContext()), FillSize,
- DEFAULT_ADDRSPACE);
+ Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE);
return false;
}
@@ -1169,10 +1247,8 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
Lex();
- if (!HasFillExpr) {
- // FIXME: Sometimes fill with nop.
+ if (!HasFillExpr)
FillExpr = 0;
- }
// Compute alignment in bytes.
if (IsPow2) {
@@ -1200,14 +1276,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
}
}
- // FIXME: hard code the parser to use EmitCodeAlignment for text when using
- // the TextAlignFillValue.
- if(Out.getCurrentSection()->getKind().isText() &&
- Lexer.getMAI().getTextAlignFillValue() == FillExpr)
+ // Check whether we should use optimal code alignment for this .align
+ // directive.
+ //
+ // FIXME: This should be using a target hook.
+ bool UseCodeAlign = false;
+ if (const MCSectionMachO *S = dyn_cast<MCSectionMachO>(
+ Out.getCurrentSection()))
+ UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS);
+ if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
+ ValueSize == 1 && UseCodeAlign) {
Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
- else
+ } else {
// FIXME: Target specific behavior about how the "extra" bytes are filled.
Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ }
return false;
}
@@ -1239,6 +1322,52 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
return false;
}
+/// ParseDirectiveELFType
+/// ::= .type identifier , @attribute
+bool AsmParser::ParseDirectiveELFType() {
+ StringRef Name;
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = CreateSymbol(Name);
+
+ if (Lexer.isNot(AsmToken::Comma))
+ return TokError("unexpected token in '.type' directive");
+ Lex();
+
+ if (Lexer.isNot(AsmToken::At))
+ return TokError("expected '@' before type");
+ Lex();
+
+ StringRef Type;
+ SMLoc TypeLoc;
+
+ TypeLoc = Lexer.getLoc();
+ if (ParseIdentifier(Type))
+ return TokError("expected symbol type in directive");
+
+ MCSymbolAttr Attr = StringSwitch<MCSymbolAttr>(Type)
+ .Case("function", MCSA_ELF_TypeFunction)
+ .Case("object", MCSA_ELF_TypeObject)
+ .Case("tls_object", MCSA_ELF_TypeTLS)
+ .Case("common", MCSA_ELF_TypeCommon)
+ .Case("notype", MCSA_ELF_TypeNoType)
+ .Default(MCSA_Invalid);
+
+ if (Attr == MCSA_Invalid)
+ return Error(TypeLoc, "unsupported attribute in '.type' directive");
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.type' directive");
+
+ Lex();
+
+ Out.EmitSymbolAttribute(Sym, Attr);
+
+ return false;
+}
+
/// ParseDirectiveDarwinSymbolDesc
/// ::= .desc identifier , expression
bool AsmParser::ParseDirectiveDarwinSymbolDesc() {
@@ -1316,7 +1445,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
"be less than zero");
- // NOTE: The alignment in the directive is a power of 2 value, the assember
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
// may internally end up wanting an alignment in bytes.
// FIXME: Diagnose overflow.
if (Pow2Alignment < 0)
@@ -1344,22 +1473,18 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
/// ::= .zerofill segname , sectname [, identifier , size_expression [
/// , align_expression ]]
bool AsmParser::ParseDirectiveDarwinZerofill() {
- // FIXME: Handle quoted names here.
-
- if (Lexer.isNot(AsmToken::Identifier))
+ StringRef Segment;
+ if (ParseIdentifier(Segment))
return TokError("expected segment name after '.zerofill' directive");
- StringRef Segment = getTok().getString();
- Lex();
if (Lexer.isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
Lex();
-
- if (Lexer.isNot(AsmToken::Identifier))
+
+ StringRef Section;
+ if (ParseIdentifier(Section))
return TokError("expected section name after comma in '.zerofill' "
"directive");
- StringRef Section = getTok().getString();
- Lex();
// If this is the end of the line all that was wanted was to create the
// the section but with no symbol.
@@ -1375,13 +1500,13 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
return TokError("unexpected token in directive");
Lex();
- if (Lexer.isNot(AsmToken::Identifier))
+ SMLoc IDLoc = Lexer.getLoc();
+ StringRef IDStr;
+ if (ParseIdentifier(IDStr))
return TokError("expected identifier in directive");
// handle the identifier as the key symbol.
- SMLoc IDLoc = Lexer.getLoc();
- MCSymbol *Sym = CreateSymbol(getTok().getString());
- Lex();
+ MCSymbol *Sym = CreateSymbol(IDStr);
if (Lexer.isNot(AsmToken::Comma))
return TokError("unexpected token in directive");
@@ -1410,7 +1535,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less "
"than zero");
- // NOTE: The alignment in the directive is a power of 2 value, the assember
+ // NOTE: The alignment in the directive is a power of 2 value, the assembler
// may internally end up wanting an alignment in bytes.
// FIXME: Diagnose overflow.
if (Pow2Alignment < 0)
@@ -1431,6 +1556,60 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
return false;
}
+/// ParseDirectiveDarwinTBSS
+/// ::= .tbss identifier, size, align
+bool AsmParser::ParseDirectiveDarwinTBSS() {
+ SMLoc IDLoc = Lexer.getLoc();
+ StringRef Name;
+ if (ParseIdentifier(Name))
+ return TokError("expected identifier in directive");
+
+ // Handle the identifier as the key symbol.
+ MCSymbol *Sym = CreateSymbol(Name);
+
+ if (Lexer.isNot(AsmToken::Comma))
+ return TokError("unexpected token in directive");
+ Lex();
+
+ int64_t Size;
+ SMLoc SizeLoc = Lexer.getLoc();
+ if (ParseAbsoluteExpression(Size))
+ return true;
+
+ int64_t Pow2Alignment = 0;
+ SMLoc Pow2AlignmentLoc;
+ if (Lexer.is(AsmToken::Comma)) {
+ Lex();
+ Pow2AlignmentLoc = Lexer.getLoc();
+ if (ParseAbsoluteExpression(Pow2Alignment))
+ return true;
+ }
+
+ if (Lexer.isNot(AsmToken::EndOfStatement))
+ return TokError("unexpected token in '.tbss' directive");
+
+ Lex();
+
+ if (Size < 0)
+ return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than"
+ "zero");
+
+ // FIXME: Diagnose overflow.
+ if (Pow2Alignment < 0)
+ return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less"
+ "than zero");
+
+ if (!Sym->isUndefined())
+ return Error(IDLoc, "invalid symbol redefinition");
+
+ Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss",
+ MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
+ 0, SectionKind::getThreadBSS()),
+ Sym, Size, 1 << Pow2Alignment);
+
+ return false;
+}
+
/// ParseDirectiveDarwinSubsectionsViaSymbols
/// ::= .subsections_via_symbols
bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() {
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index f6e9636..a792d56 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -20,30 +20,3 @@ using namespace llvm;
MCSection::~MCSection() {
}
-//===----------------------------------------------------------------------===//
-// MCSectionCOFF
-//===----------------------------------------------------------------------===//
-
-MCSectionCOFF *MCSectionCOFF::
-Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
- char *NameCopy = static_cast<char*>(
- Ctx.Allocate(Name.size(), /*Alignment=*/1));
- memcpy(NameCopy, Name.data(), Name.size());
- return new (Ctx) MCSectionCOFF(StringRef(NameCopy, Name.size()),
- IsDirective, K);
-}
-
-void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
- raw_ostream &OS) const {
-
- if (isDirective()) {
- OS << getName() << '\n';
- return;
- }
- OS << "\t.section\t" << getName() << ",\"";
- if (getKind().isText())
- OS << 'x';
- if (getKind().isWriteable())
- OS << 'w';
- OS << "\"\n";
-}
diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp
new file mode 100644
index 0000000..d57bb0c
--- /dev/null
+++ b/lib/MC/MCSectionCOFF.cpp
@@ -0,0 +1,76 @@
+//===- lib/MC/MCSectionCOFF.cpp - COFF Code Section Representation --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSectionCOFF.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+MCSectionCOFF::~MCSectionCOFF() {} // anchor.
+
+// ShouldOmitSectionDirective - Decides whether a '.section' directive
+// should be printed before the section name
+bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name,
+ const MCAsmInfo &MAI) const {
+
+ // FIXME: Does .section .bss/.data/.text work everywhere??
+ if (Name == ".text" || Name == ".data" || Name == ".bss")
+ return true;
+
+ return false;
+}
+
+void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
+ raw_ostream &OS) const {
+
+ // standard sections don't require the '.section'
+ if (ShouldOmitSectionDirective(SectionName, MAI)) {
+ OS << '\t' << getSectionName() << '\n';
+ return;
+ }
+
+ OS << "\t.section\t" << getSectionName() << ",\"";
+ if (getKind().isText())
+ OS << 'x';
+ if (getKind().isWriteable())
+ OS << 'w';
+ else
+ OS << 'r';
+ if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE)
+ OS << 'n';
+ OS << "\"\n";
+
+ if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) {
+ switch (Selection) {
+ case IMAGE_COMDAT_SELECT_NODUPLICATES:
+ OS << "\t.linkonce one_only\n";
+ break;
+ case IMAGE_COMDAT_SELECT_ANY:
+ OS << "\t.linkonce discard\n";
+ break;
+ case IMAGE_COMDAT_SELECT_SAME_SIZE:
+ OS << "\t.linkonce same_size\n";
+ break;
+ case IMAGE_COMDAT_SELECT_EXACT_MATCH:
+ OS << "\t.linkonce same_contents\n";
+ break;
+ //NOTE: as of binutils 2.20, there is no way to specifiy select largest
+ // with the .linkonce directive. For now, we treat it as an invalid
+ // comdat selection value.
+ case IMAGE_COMDAT_SELECT_LARGEST:
+ // OS << "\t.linkonce largest\n";
+ // break;
+ default:
+ assert (0 && "unsupported COFF selection type");
+ break;
+ }
+ }
+}
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index 3a18cee..ded3b20 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -34,7 +34,14 @@ static const struct {
{ "interposing", "S_INTERPOSING" }, // 0x0D
{ "16byte_literals", "S_16BYTE_LITERALS" }, // 0x0E
{ 0, /*FIXME??*/ "S_DTRACE_DOF" }, // 0x0F
- { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" } // 0x10
+ { 0, /*FIXME??*/ "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+ { "thread_local_regular", "S_THREAD_LOCAL_REGULAR" }, // 0x11
+ { "thread_local_zerofill", "S_THREAD_LOCAL_ZEROFILL" }, // 0x12
+ { "thread_local_variables", "S_THREAD_LOCAL_VARIABLES" }, // 0x13
+ { "thread_local_variable_pointers",
+ "S_THREAD_LOCAL_VARIABLE_POINTERS" }, // 0x14
+ { "thread_local_init_function_pointers",
+ "S_THREAD_LOCAL_INIT_FUNCTION_POINTERS"}, // 0x15
};
@@ -66,7 +73,7 @@ ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC)
MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
unsigned TAA, unsigned reserved2, SectionKind K)
- : MCSection(K), TypeAndAttributes(TAA), Reserved2(reserved2) {
+ : MCSection(SV_MachO, K), TypeAndAttributes(TAA), Reserved2(reserved2) {
assert(Segment.size() <= 16 && Section.size() <= 16 &&
"Segment or section string too long");
for (unsigned i = 0; i != 16; ++i) {
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index 4f484a2..573f2a3 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -48,7 +48,7 @@ void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
EmitValue(E, 1, AddrSpace);
}
-/// EmitRawText - If this file is backed by a assembly streamer, this dumps
+/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
void MCStreamer::EmitRawText(StringRef String) {
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 3fb1233..07751f7 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -38,6 +39,17 @@ static bool NameNeedsQuoting(StringRef Str) {
return false;
}
+void MCSymbol::setVariableValue(const MCExpr *Value) {
+ assert(Value && "Invalid variable value!");
+ assert((isUndefined() || (isAbsolute() && isa<MCConstantExpr>(Value))) &&
+ "Invalid redefinition!");
+ this->Value = Value;
+
+ // Mark the variable as absolute as appropriate.
+ if (isa<MCConstantExpr>(Value))
+ setAbsolute();
+}
+
void MCSymbol::print(raw_ostream &OS) const {
// The name for this MCSymbol is required to be a valid target name. However,
// some targets support quoting names with funny characters. If the name
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index a533ccf..3207e99 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -16,6 +16,7 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCMachOSymbolFlags.h"
#include "llvm/MC/MCValue.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachO.h"
@@ -58,6 +59,20 @@ static bool isFixupKindRIPRel(unsigned Kind) {
Kind == X86::reloc_riprel_4byte_movq_load;
}
+static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
+ // Undefined symbols are always extern.
+ if (SD->Symbol->isUndefined())
+ return true;
+
+ // References to weak definitions require external relocation entries; the
+ // definition may not always be the one in the same object file.
+ if (SD->getFlags() & SF_WeakDefinition)
+ return true;
+
+ // Otherwise, we can use an internal relocation.
+ return false;
+}
+
namespace {
class MachObjectWriterImpl {
@@ -130,7 +145,8 @@ class MachObjectWriterImpl {
RIT_Pair = 1,
RIT_Difference = 2,
RIT_PreboundLazyPointer = 3,
- RIT_LocalDifference = 4
+ RIT_LocalDifference = 4,
+ RIT_TLV = 5
};
/// X86_64 uses its own relocation types.
@@ -143,7 +159,8 @@ class MachObjectWriterImpl {
RIT_X86_64_Subtractor = 5,
RIT_X86_64_Signed1 = 6,
RIT_X86_64_Signed2 = 7,
- RIT_X86_64_Signed4 = 8
+ RIT_X86_64_Signed4 = 8,
+ RIT_X86_64_TLV = 9
};
/// MachSymbolData - Helper struct for containing some precomputed information
@@ -155,8 +172,8 @@ class MachObjectWriterImpl {
// Support lexicographic sorting.
bool operator<(const MachSymbolData &RHS) const {
- const std::string &Name = SymbolData->getSymbol().getName();
- return Name < RHS.SymbolData->getSymbol().getName();
+ return SymbolData->getSymbol().getName() <
+ RHS.SymbolData->getSymbol().getName();
}
};
@@ -170,6 +187,7 @@ class MachObjectWriterImpl {
llvm::DenseMap<const MCSectionData*,
std::vector<MachRelocationEntry> > Relocations;
+ llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
/// @}
/// @name Symbol Table Data
@@ -289,9 +307,7 @@ public:
uint64_t Start = OS.tell();
(void) Start;
- // FIXME: cast<> support!
- const MCSectionMachO &Section =
- static_cast<const MCSectionMachO&>(SD.getSection());
+ const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
WriteBytes(Section.getSectionName(), 16);
WriteBytes(Section.getSegmentName(), 16);
if (Is64Bit) {
@@ -312,7 +328,7 @@ public:
Write32(NumRelocations ? RelocationsStart : 0);
Write32(NumRelocations);
Write32(Flags);
- Write32(0); // reserved1
+ Write32(IndirectSymBase.lookup(&SD)); // reserved1
Write32(Section.getStubSize()); // reserved2
if (Is64Bit)
Write32(0); // reserved3
@@ -404,7 +420,7 @@ public:
// Compute the symbol address.
if (Symbol.isDefined()) {
if (Symbol.isAbsolute()) {
- llvm_unreachable("FIXME: Not yet implemented!");
+ Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
} else {
Address = Layout.getSymbolAddress(&Data);
}
@@ -456,14 +472,17 @@ public:
void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment,
- const MCAsmFixup &Fixup, MCValue Target,
+ const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
- unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind);
- unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+ unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+ unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
// See <reloc.h>.
- uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
+ uint32_t FixupOffset =
+ Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ uint32_t FixupAddress =
+ Layout.getFragmentAddress(Fragment) + Fixup.getOffset();
int64_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
@@ -532,7 +551,7 @@ public:
Type = RIT_X86_64_Unsigned;
MachRelocationEntry MRE;
- MRE.Word0 = Address;
+ MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
(Log2Size << 25) |
@@ -548,6 +567,17 @@ public:
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
+ // Relocations inside debug sections always use local relocations when
+ // possible. This seems to be done because the debugger doesn't fully
+ // understand x86_64 relocation entries, and expects to find values that
+ // have already been fixed up.
+ if (Symbol->isInSection()) {
+ const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+ Fragment->getParent()->getSection());
+ if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+ Base = 0;
+ }
+
// x86_64 almost always uses external relocations, except when there is no
// symbol to use as a base address (a local symbol with no preceeding
// non-local symbol).
@@ -558,14 +588,17 @@ public:
// Add the local offset, if needed.
if (Base != &SD)
Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
- } else {
+ } else if (Symbol->isInSection()) {
// The index is the section ordinal (1-based).
Index = SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
Value += Layout.getSymbolAddress(&SD);
if (IsPCRel)
- Value -= Address + (1 << Log2Size);
+ Value -= FixupAddress + (1 << Log2Size);
+ } else {
+ report_fatal_error("unsupported relocation of undefined symbol '" +
+ Symbol->getName() + "'");
}
MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
@@ -575,14 +608,37 @@ public:
// x86_64 distinguishes movq foo@GOTPCREL so that the linker can
// rewrite the movq to an leaq at link time if the symbol ends up in
// the same linkage unit.
- if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load)
+ if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
Type = RIT_X86_64_GOTLoad;
else
Type = RIT_X86_64_GOT;
- } else if (Modifier != MCSymbolRefExpr::VK_None)
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ Type = RIT_X86_64_TLV;
+ } else if (Modifier != MCSymbolRefExpr::VK_None) {
report_fatal_error("unsupported symbol modifier in relocation");
- else
+ } else {
Type = RIT_X86_64_Signed;
+
+ // The Darwin x86_64 relocation format has a problem where it cannot
+ // encode an address (L<foo> + <constant>) which is outside the atom
+ // containing L<foo>. Generally, this shouldn't occur but it does
+ // happen when we have a RIPrel instruction with data following the
+ // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+ // adjustment Darwin x86_64 uses, the offset is still negative and
+ // the linker has no way to recognize this.
+ //
+ // To work around this, Darwin uses several special relocation types
+ // to indicate the offsets. However, the specification or
+ // implementation of these seems to also be incomplete; they should
+ // adjust the addend as well based on the actual encoded instruction
+ // (the additional bias), but instead appear to just look at the
+ // final offset.
+ switch (-(Target.getConstant() + (1LL << Log2Size))) {
+ case 1: Type = RIT_X86_64_Signed1; break;
+ case 2: Type = RIT_X86_64_Signed2; break;
+ case 4: Type = RIT_X86_64_Signed4; break;
+ }
+ }
} else {
if (Modifier != MCSymbolRefExpr::VK_None)
report_fatal_error("unsupported symbol modifier in branch "
@@ -590,27 +646,6 @@ public:
Type = RIT_X86_64_Branch;
}
-
- // The Darwin x86_64 relocation format has a problem where it cannot
- // encode an address (L<foo> + <constant>) which is outside the atom
- // containing L<foo>. Generally, this shouldn't occur but it does happen
- // when we have a RIPrel instruction with data following the relocation
- // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment
- // Darwin x86_64 uses, the offset is still negative and the linker has
- // no way to recognize this.
- //
- // To work around this, Darwin uses several special relocation types to
- // indicate the offsets. However, the specification or implementation of
- // these seems to also be incomplete; they should adjust the addend as
- // well based on the actual encoded instruction (the additional bias),
- // but instead appear to just look at the final offset.
- if (IsRIPRel) {
- switch (-(Target.getConstant() + (1LL << Log2Size))) {
- case 1: Type = RIT_X86_64_Signed1; break;
- case 2: Type = RIT_X86_64_Signed2; break;
- case 4: Type = RIT_X86_64_Signed4; break;
- }
- }
} else {
if (Modifier == MCSymbolRefExpr::VK_GOT) {
Type = RIT_X86_64_GOT;
@@ -621,6 +656,8 @@ public:
// required to include any necessary offset directly.
Type = RIT_X86_64_GOT;
IsPCRel = 1;
+ } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+ report_fatal_error("TLVP symbol modifier should have been rip-rel");
} else if (Modifier != MCSymbolRefExpr::VK_None)
report_fatal_error("unsupported symbol modifier in relocation");
else
@@ -633,7 +670,7 @@ public:
// struct relocation_info (8 bytes)
MachRelocationEntry MRE;
- MRE.Word0 = Address;
+ MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
(Log2Size << 25) |
@@ -645,11 +682,11 @@ public:
void RecordScatteredRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
- const MCAsmFixup &Fixup, MCValue Target,
+ const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
- unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
- unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+ unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
unsigned Type = RIT_Vanilla;
// See <reloc.h>.
@@ -692,40 +729,49 @@ public:
}
MachRelocationEntry MRE;
- MRE.Word0 = ((Address << 0) |
- (Type << 24) |
- (Log2Size << 28) |
- (IsPCRel << 30) |
+ MRE.Word0 = ((FixupOffset << 0) |
+ (Type << 24) |
+ (Log2Size << 28) |
+ (IsPCRel << 30) |
RF_Scattered);
MRE.Word1 = Value;
Relocations[Fragment->getParent()].push_back(MRE);
}
void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCAsmFixup &Fixup,
+ const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, uint64_t &FixedValue) {
if (Is64Bit) {
RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
return;
}
- unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
- unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
+ unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind());
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
// If this is a difference or a defined symbol plus an offset, then we need
// a scattered relocation entry.
+ // Differences always require scattered relocations.
+ if (Target.getSymB())
+ return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
+
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // If this is an internal relocation with an offset, it also needs a
+ // scattered relocation entry.
uint32_t Offset = Target.getConstant();
if (IsPCRel)
Offset += 1 << Log2Size;
- if (Target.getSymB() ||
- (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() &&
- Offset)) {
- RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue);
- return;
- }
+ if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
+ return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
+ Target, FixedValue);
// See <reloc.h>.
- uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
uint32_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
@@ -739,12 +785,15 @@ public:
Type = RIT_Vanilla;
Value = 0;
} else {
- const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
- MCSymbolData *SD = &Asm.getSymbolData(*Symbol);
-
- if (Symbol->isUndefined()) {
+ // Check whether we need an external or internal relocation.
+ if (doesSymbolRequireExternRelocation(SD)) {
IsExtern = 1;
Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolAddress(SD);
Value = 0;
} else {
// The index is the section ordinal (1-based).
@@ -757,7 +806,7 @@ public:
// struct relocation_info (8 bytes)
MachRelocationEntry MRE;
- MRE.Word0 = Address;
+ MRE.Word0 = FixupOffset;
MRE.Word1 = ((Index << 0) |
(IsPCRel << 24) |
(Log2Size << 25) |
@@ -775,29 +824,37 @@ public:
// FIXME: Revisit this when the dust settles.
// Bind non lazy symbol pointers first.
+ unsigned IndirectIndex = 0;
for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
- ie = Asm.indirect_symbol_end(); it != ie; ++it) {
- // FIXME: cast<> support!
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
const MCSectionMachO &Section =
- static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+ cast<MCSectionMachO>(it->SectionData->getSection());
if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
continue;
+ // Initialize the section indirect symbol base, if necessary.
+ if (!IndirectSymBase.count(it->SectionData))
+ IndirectSymBase[it->SectionData] = IndirectIndex;
+
Asm.getOrCreateSymbolData(*it->Symbol);
}
// Then lazy symbol pointers and symbol stubs.
+ IndirectIndex = 0;
for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
- ie = Asm.indirect_symbol_end(); it != ie; ++it) {
- // FIXME: cast<> support!
+ ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
const MCSectionMachO &Section =
- static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+ cast<MCSectionMachO>(it->SectionData->getSection());
if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
continue;
+ // Initialize the section indirect symbol base, if necessary.
+ if (!IndirectSymBase.count(it->SectionData))
+ IndirectSymBase[it->SectionData] = IndirectIndex;
+
// Set the symbol type to undefined lazy, but only on construction.
//
// FIXME: Do not hardcode.
@@ -1111,7 +1168,7 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
const MCAsmLayout &Layout,
const MCFragment *Fragment,
- const MCAsmFixup &Fixup, MCValue Target,
+ const MCFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
Target, FixedValue);
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 50025d2..1341d21 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -1382,13 +1382,12 @@ APInt APInt::sqrt() const {
// libc sqrt function which will probably use a hardware sqrt computation.
// This should be faster than the algorithm below.
if (magnitude < 52) {
-#if defined( _MSC_VER ) || defined(_MINIX)
- // Amazingly, VC++ and Minix don't have round().
+#if HAVE_ROUND
return APInt(BitWidth,
- uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
+ uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
#else
return APInt(BitWidth,
- uint64_t(::round(::sqrt(double(isSingleWord()?VAL:pVal[0])))));
+ uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
#endif
}
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index d31f34e..ae66110 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -1170,7 +1170,9 @@ public:
std::string CPU = sys::getHostCPUName();
if (CPU == "generic") CPU = "(unknown)";
OS << ".\n"
+#if (ENABLE_TIMESTAMPS == 1)
<< " Built " << __DATE__ << " (" << __TIME__ << ").\n"
+#endif
<< " Host: " << sys::getHostTriple() << '\n'
<< " Host CPU: " << CPU << '\n'
<< '\n'
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 56a171c..7e7ca9d 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -16,6 +16,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Signals.h"
#include "llvm/System/Threading.h"
#include <cassert>
#include <cstdlib>
@@ -52,6 +53,12 @@ void llvm::report_fatal_error(const Twine &reason) {
} else {
ErrorHandler(ErrorHandlerUserData, reason.str());
}
+
+ // If we reached here, we are failing ungracefully. Run the interrupt handlers
+ // to make sure any special cleanups get done, in particular that we remove
+ // files registered with RemoveFileOnSignal.
+ sys::RunInterruptHandlers();
+
exit(1);
}
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index 68b41a7..7a04a53 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -50,8 +50,8 @@ static void PrintCurStackTrace(raw_ostream &OS) {
// Integrate with crash reporter.
#ifdef __APPLE__
-extern "C" const char *__crashreporter_info__;
-const char *__crashreporter_info__ = 0;
+static const char *__crashreporter_info__ = 0;
+asm(".desc ___crashreporter_info__, 0x10");
#endif
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 2b262dc..ca0f518 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -23,6 +23,10 @@ static char ascii_tolower(char x) {
return x;
}
+static bool ascii_isdigit(char x) {
+ return x >= '0' && x <= '9';
+}
+
/// compare_lower - Compare strings, ignoring case.
int StringRef::compare_lower(StringRef RHS) const {
for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
@@ -37,6 +41,30 @@ int StringRef::compare_lower(StringRef RHS) const {
return Length < RHS.Length ? -1 : 1;
}
+/// compare_numeric - Compare strings, handle embedded numbers.
+int StringRef::compare_numeric(StringRef RHS) const {
+ for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) {
+ if (Data[I] == RHS.Data[I])
+ continue;
+ if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
+ // The longer sequence of numbers is larger. This doesn't really handle
+ // prefixed zeros well.
+ for (size_t J = I+1; J != E+1; ++J) {
+ bool ld = J < Length && ascii_isdigit(Data[J]);
+ bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
+ if (ld != rd)
+ return rd ? -1 : 1;
+ if (!rd)
+ break;
+ }
+ }
+ return Data[I] < RHS.Data[I] ? -1 : 1;
+ }
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
// Compute the edit distance between the two given strings.
unsigned StringRef::edit_distance(llvm::StringRef Other,
bool AllowReplacements) {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 481f6ba..784b77c 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -61,6 +61,10 @@ raw_ostream *llvm::CreateInfoOutputFile() {
if (OutputFilename == "-")
return new raw_fd_ostream(1, false); // stdout.
+ // Append mode is used because the info output file is opened and closed
+ // each time -stats or -time-passes wants to print output to it. To
+ // compensate for this, the test-suite Makefiles have code to delete the
+ // info output file before running commands which write to it.
std::string Error;
raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
Error, raw_fd_ostream::F_Append);
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 21504e9..b3ea013 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -48,10 +48,10 @@ void Twine::printOneChild(raw_ostream &OS, const void *Ptr,
OS << *static_cast<const StringRef*>(Ptr);
break;
case Twine::DecUIKind:
- OS << *static_cast<const unsigned int*>(Ptr);
+ OS << (unsigned)(uintptr_t)Ptr;
break;
case Twine::DecIKind:
- OS << *static_cast<const int*>(Ptr);
+ OS << (int)(intptr_t)Ptr;
break;
case Twine::DecULKind:
OS << *static_cast<const unsigned long*>(Ptr);
@@ -95,10 +95,10 @@ void Twine::printOneChildRepr(raw_ostream &OS, const void *Ptr,
<< static_cast<const StringRef*>(Ptr) << "\"";
break;
case Twine::DecUIKind:
- OS << "decUI:\"" << *static_cast<const unsigned int*>(Ptr) << "\"";
+ OS << "decUI:\"" << (unsigned)(uintptr_t)Ptr << "\"";
break;
case Twine::DecIKind:
- OS << "decI:\"" << *static_cast<const int*>(Ptr) << "\"";
+ OS << "decI:\"" << (int)(intptr_t)Ptr << "\"";
break;
case Twine::DecULKind:
OS << "decUL:\"" << *static_cast<const unsigned long*>(Ptr) << "\"";
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 0b05c54..11cf0ec 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/STLExtras.h"
#include <cctype>
+#include <cerrno>
#include <sys/stat.h>
#include <sys/types.h>
@@ -399,37 +400,76 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
if (Flags & F_Excl)
OpenFlags |= O_EXCL;
- FD = open(Filename, OpenFlags, 0664);
- if (FD < 0) {
- ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
- ShouldClose = false;
- } else {
- ShouldClose = true;
+ while ((FD = open(Filename, OpenFlags, 0664)) < 0) {
+ if (errno != EINTR) {
+ ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
+ ShouldClose = false;
+ return;
+ }
}
+
+ // Ok, we successfully opened the file, so it'll need to be closed.
+ ShouldClose = true;
}
raw_fd_ostream::~raw_fd_ostream() {
if (FD < 0) return;
flush();
if (ShouldClose)
- if (::close(FD) != 0)
- error_detected();
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
}
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
assert(FD >= 0 && "File already closed.");
pos += Size;
- if (::write(FD, Ptr, Size) != (ssize_t) Size)
- error_detected();
+ ssize_t ret;
+
+ do {
+ ret = ::write(FD, Ptr, Size);
+
+ if (ret < 0) {
+ // If it's a recoverable error, swallow it and retry the write.
+ //
+ // Ideally we wouldn't ever see EAGAIN or EWOULDBLOCK here, since
+ // raw_ostream isn't designed to do non-blocking I/O. However, some
+ // programs, such as old versions of bjam, have mistakenly used
+ // O_NONBLOCK. For compatibility, emulate blocking semantics by
+ // spinning until the write succeeds. If you don't want spinning,
+ // don't use O_NONBLOCK file descriptors with raw_ostream.
+ if (errno == EINTR || errno == EAGAIN
+#ifdef EWOULDBLOCK
+ || errno == EWOULDBLOCK
+#endif
+ )
+ continue;
+
+ // Otherwise it's a non-recoverable error. Note it and quit.
+ error_detected();
+ break;
+ }
+
+ // The write may have written some or all of the data. Update the
+ // size and buffer pointer to reflect the remainder that needs
+ // to be written. If there are no bytes left, we're done.
+ Ptr += ret;
+ Size -= ret;
+ } while (Size > 0);
}
void raw_fd_ostream::close() {
assert(ShouldClose);
ShouldClose = false;
flush();
- if (::close(FD) != 0)
- error_detected();
+ while (::close(FD) != 0)
+ if (errno != EINTR) {
+ error_detected();
+ break;
+ }
FD = -1;
}
diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc
index 56bf9e7..9548816 100644
--- a/lib/System/Unix/Signals.inc
+++ b/lib/System/Unix/Signals.inc
@@ -152,7 +152,9 @@ static RETSIGTYPE SignalHandler(int Sig) {
CallBacksToRun[i].first(CallBacksToRun[i].second);
}
-
+void llvm::sys::RunInterruptHandlers() {
+ SignalHandler(SIGINT);
+}
void llvm::sys::SetInterruptFunction(void (*IF)()) {
SignalsMutex.acquire();
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index f2b72ca..a3a393c 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -189,6 +189,10 @@ static void Cleanup() {
LeaveCriticalSection(&CriticalSection);
}
+void llvm::sys::RunInterruptHandlers() {
+ Cleanup();
+}
+
static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) {
try {
Cleanup();
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index b08f942..ae7ae59 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -48,7 +48,7 @@ namespace ARMCC {
AL
};
- inline static CondCodes getOppositeCondition(CondCodes CC){
+ inline static CondCodes getOppositeCondition(CondCodes CC) {
switch (CC) {
default: llvm_unreachable("Unknown condition code");
case EQ: return NE;
@@ -67,7 +67,7 @@ namespace ARMCC {
case LE: return GT;
}
}
-}
+} // namespace ARMCC
inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
switch (CC) {
@@ -90,6 +90,10 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
}
}
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool ModelWithRegSequence();
+
FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index b4dec0c..f1e6a9f 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -32,6 +32,8 @@ def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
"ARM v6t2">;
def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
"ARM v7A">;
+def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
+ "ARM v7M">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
"Enable VFP2 instructions">;
def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
@@ -42,6 +44,10 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
"Enable Thumb2 instructions">;
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
+ "Enable divide instructions">;
+def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
+ "Enable Thumb2 extract and pack instructions">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -123,9 +129,11 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureNEONForFP]>;
+ FeatureNEONForFP, FeatureT2ExtractPack]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON]>;
+ [ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
+def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
+def : ProcNoItin<"cortex-m4", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index a193858..2528854 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Support/CommandLine.h"
@@ -196,6 +197,42 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
return NewMIs[0];
}
+bool
+ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
+ if (CSI.empty())
+ return false;
+
+ DebugLoc DL;
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill)
+ MBB.addLiveIn(Reg);
+
+ // Insert the spill to the stack frame. The register is killed at the spill
+ //
+ storeRegToStackSlot(MBB, MI, Reg, isKill,
+ CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI);
+ }
+ return true;
+}
+
// Branch analysis.
bool
ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
@@ -481,6 +518,10 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
// If this machine instr is a constant pool entry, its size is recorded as
// operand #2.
return MI->getOperand(2).getImm();
+ case ARM::Int_eh_sjlj_longjmp:
+ return 16;
+ case ARM::tInt_eh_sjlj_longjmp:
+ return 10;
case ARM::Int_eh_sjlj_setjmp:
case ARM::Int_eh_sjlj_setjmp_nofp:
return 24;
@@ -540,16 +581,17 @@ bool
ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
- SrcSubIdx = DstSubIdx = 0; // No sub-registers.
-
switch (MI.getOpcode()) {
default: break;
case ARM::VMOVS:
case ARM::VMOVD:
case ARM::VMOVDneon:
- case ARM::VMOVQ: {
+ case ARM::VMOVQ:
+ case ARM::VMOVQQ : {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
return true;
}
case ARM::MOVr:
@@ -564,6 +606,8 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
"Invalid ARM MOV instruction");
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
+ SrcSubIdx = MI.getOperand(1).getSubReg();
+ DstSubIdx = MI.getOperand(0).getSubReg();
return true;
}
}
@@ -654,10 +698,8 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// tGPR is used sometimes in ARM instructions that need to avoid using
// certain registers. Just treat it as GPR here.
if (DestRC == ARM::tGPRRegisterClass)
@@ -679,6 +721,12 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
SrcRC == ARM::QPR_8RegisterClass)
SrcRC = ARM::QPRRegisterClass;
+ // Allow QQPR / QQPR_VFP2 cross-class copies.
+ if (DestRC == ARM::QQPR_VFP2RegisterClass)
+ DestRC = ARM::QQPRRegisterClass;
+ if (SrcRC == ARM::QQPR_VFP2RegisterClass)
+ SrcRC = ARM::QQPRRegisterClass;
+
// Disallow copies of unequal sizes.
if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize())
return false;
@@ -703,20 +751,36 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = ARM::VMOVDneon;
else if (DestRC == ARM::QPRRegisterClass)
Opc = ARM::VMOVQ;
+ else if (DestRC == ARM::QQPRRegisterClass)
+ Opc = ARM::VMOVQQ;
+ else if (DestRC == ARM::QQQQPRRegisterClass)
+ Opc = ARM::VMOVQQQQ;
else
return false;
- AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg)
- .addReg(SrcReg));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg));
}
return true;
}
+static const
+MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
+ unsigned Reg, unsigned SubIdx, unsigned State,
+ const TargetRegisterInfo *TRI) {
+ if (!SubIdx)
+ return MIB.addReg(Reg, State);
+
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
+ return MIB.addReg(Reg, State, SubIdx);
+}
+
void ARMBaseInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -738,45 +802,82 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ } else if (RC == ARM::SPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
- .addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else {
- assert((RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
+ } else if (RC == ARM::QPRRegisterClass ||
+ RC == ARM::QPR_VFP2RegisterClass ||
+ RC == ARM::QPR_8RegisterClass) {
// FIXME: Neon instructions should support predicates
- if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
.addFrameIndex(FI).addImm(128)
- .addMemOperand(MMO)
- .addReg(SrcReg, getKillRegState(isKill)));
+ .addReg(SrcReg, getKillRegState(isKill))
+ .addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
- addReg(SrcReg, getKillRegState(isKill))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ))
+ .addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI)
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
+ } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ // FIXME: It's possible to only store part of the QQ register if the
+ // spilled def has a sub-register index.
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32))
+ .addFrameIndex(FI).addImm(128);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ AddDefaultPred(MIB.addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ }
+ } else {
+ assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
+ MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
+ AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
}
}
void ARMBaseInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
unsigned Align = MFI.getObjectAlignment(FI);
-
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOLoad, 0,
@@ -791,20 +892,18 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
+ } else if (RC == ARM::SPRRegisterClass) {
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
- } else {
- assert((RC == ARM::QPRRegisterClass ||
- RC == ARM::QPR_VFP2RegisterClass ||
- RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
- if (Align >= 16
- && (getRegisterInfo().canRealignStack(MF))) {
+ } else if (RC == ARM::QPRRegisterClass ||
+ RC == ARM::QPR_VFP2RegisterClass ||
+ RC == ARM::QPR_8RegisterClass) {
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
@@ -814,6 +913,40 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
.addMemOperand(MMO));
}
+ } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){
+ if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
+ MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32));
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO));
+ } else {
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ }
+ } else {
+ assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!");
+ MachineInstrBuilder MIB =
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD))
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)))
+ .addMemOperand(MMO);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI);
+ MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI);
+ AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI);
}
}
@@ -930,8 +1063,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
- }
- else if (Opc == ARM::VMOVD) {
+ } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -957,6 +1089,56 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
+ } else if (Opc == ARM::VMOVQ) {
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Pred = MI->getOperand(2).getImm();
+ unsigned PredReg = MI->getOperand(3).getReg();
+ if (OpNum == 0) { // move -> store
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned SrcSubReg = MI->getOperand(1).getSubReg();
+ bool isKill = MI->getOperand(1).isKill();
+ bool isUndef = MI->getOperand(1).isUndef();
+ if (MFI.getObjectAlignment(FI) >= 16 &&
+ getRegisterInfo().canRealignStack(MF)) {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q))
+ .addFrameIndex(FI).addImm(128)
+ .addReg(SrcReg,
+ getKillRegState(isKill) | getUndefRegState(isUndef),
+ SrcSubReg)
+ .addImm(Pred).addReg(PredReg);
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ))
+ .addReg(SrcReg,
+ getKillRegState(isKill) | getUndefRegState(isUndef),
+ SrcSubReg)
+ .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(Pred).addReg(PredReg);
+ }
+ } else { // move -> load
+ unsigned DstReg = MI->getOperand(0).getReg();
+ unsigned DstSubReg = MI->getOperand(0).getSubReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isUndef = MI->getOperand(0).isUndef();
+ if (MFI.getObjectAlignment(FI) >= 16 &&
+ getRegisterInfo().canRealignStack(MF)) {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q))
+ .addReg(DstReg,
+ RegState::Define |
+ getDeadRegState(isDead) |
+ getUndefRegState(isUndef),
+ DstSubReg)
+ .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg);
+ } else {
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ))
+ .addReg(DstReg,
+ RegState::Define |
+ getDeadRegState(isDead) |
+ getUndefRegState(isUndef),
+ DstSubReg)
+ .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addImm(Pred).addReg(PredReg);
+ }
+ }
}
return NewMI;
@@ -985,12 +1167,13 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
Opc == ARM::tMOVtgpr2gpr ||
Opc == ARM::tMOVgpr2tgpr) {
return true;
- } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
+ } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD ||
+ Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
return true;
- } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
- return false; // FIXME
}
+ // FIXME: VMOVQQ and VMOVQQQQ?
+
return false;
}
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 7a5630e..b566271 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -200,6 +200,11 @@ public:
virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0;
const ARMSubtarget &getSubtarget() const { return Subtarget; }
+ bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
+
// Branch analysis.
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
@@ -257,17 +262,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index bc12187..82458d2 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -259,10 +259,10 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
unsigned SubIdx) const {
switch (SubIdx) {
default: return 0;
- case 1:
- case 2:
- case 3:
- case 4:
+ case ARM::ssub_0:
+ case ARM::ssub_1:
+ case ARM::ssub_2:
+ case ARM::ssub_3: {
// S sub-registers.
if (A->getSize() == 8) {
if (B == &ARM::SPR_8RegClass)
@@ -273,22 +273,201 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &ARM::DPR_VFP2RegClass;
}
- assert(A->getSize() == 16 && "Expecting a Q register class!");
- if (B == &ARM::SPR_8RegClass)
- return &ARM::QPR_8RegClass;
- return &ARM::QPR_VFP2RegClass;
- case 5:
- case 6:
- // D sub-registers.
- if (B == &ARM::DPR_VFP2RegClass)
+ if (A->getSize() == 16) {
+ if (B == &ARM::SPR_8RegClass)
+ return &ARM::QPR_8RegClass;
return &ARM::QPR_VFP2RegClass;
- if (B == &ARM::DPR_8RegClass)
- return &ARM::QPR_8RegClass;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::SPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return &ARM::QQPR_VFP2RegClass;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::dsub_0:
+ case ARM::dsub_1:
+ case ARM::dsub_2:
+ case ARM::dsub_3: {
+ // D sub-registers.
+ if (A->getSize() == 16) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ if (A->getSize() == 32) {
+ if (B == &ARM::DPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::DPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B != &ARM::DPRRegClass)
+ return 0; // Do not allow coalescing!
return A;
}
+ case ARM::dsub_4:
+ case ARM::dsub_5:
+ case ARM::dsub_6:
+ case ARM::dsub_7: {
+ // D sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::DPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+
+ case ARM::qsub_0:
+ case ARM::qsub_1: {
+ // Q sub-registers.
+ if (A->getSize() == 32) {
+ if (B == &ARM::QPR_VFP2RegClass)
+ return &ARM::QQPR_VFP2RegClass;
+ if (B == &ARM::QPR_8RegClass)
+ return 0; // Do not allow coalescing!
+ return A;
+ }
+
+ assert(A->getSize() == 64 && "Expecting a QQQQ register class!");
+ if (B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ case ARM::qsub_2:
+ case ARM::qsub_3: {
+ // Q sub-registers of QQQQ registers.
+ if (A->getSize() == 64 && B == &ARM::QPRRegClass)
+ return A;
+ return 0; // Do not allow coalescing!
+ }
+ }
return 0;
}
+bool
+ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const {
+
+ unsigned Size = RC->getSize() * 8;
+ if (Size < 6)
+ return 0;
+
+ NewSubIdx = 0; // Whole register.
+ unsigned NumRegs = SubIndices.size();
+ if (NumRegs == 8) {
+ // 8 D registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[0] == ARM::dsub_0 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3 &&
+ SubIndices[4] == ARM::dsub_4 &&
+ SubIndices[5] == ARM::dsub_5 &&
+ SubIndices[6] == ARM::dsub_6 &&
+ SubIndices[7] == ARM::dsub_7);
+ } else if (NumRegs == 4) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 4 Q registers -> 1 QQQQ register.
+ return (Size == 512 &&
+ SubIndices[1] == ARM::qsub_1 &&
+ SubIndices[2] == ARM::qsub_2 &&
+ SubIndices[3] == ARM::qsub_3);
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 4 D registers -> 1 QQ register.
+ if (Size >= 256 &&
+ SubIndices[1] == ARM::dsub_1 &&
+ SubIndices[2] == ARM::dsub_2 &&
+ SubIndices[3] == ARM::dsub_3) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 4 D registers -> 1 QQ register (2nd).
+ if (Size == 512 &&
+ SubIndices[1] == ARM::dsub_5 &&
+ SubIndices[2] == ARM::dsub_6 &&
+ SubIndices[3] == ARM::dsub_7) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 4 S registers -> 1 Q register.
+ if (Size >= 128 &&
+ SubIndices[1] == ARM::ssub_1 &&
+ SubIndices[2] == ARM::ssub_2 &&
+ SubIndices[3] == ARM::ssub_3) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ }
+ } else if (NumRegs == 2) {
+ if (SubIndices[0] == ARM::qsub_0) {
+ // 2 Q registers -> 1 QQ register.
+ if (Size >= 256 && SubIndices[1] == ARM::qsub_1) {
+ if (Size == 512)
+ NewSubIdx = ARM::qqsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::qsub_2) {
+ // 2 Q registers -> 1 QQ register (2nd).
+ if (Size == 512 && SubIndices[1] == ARM::qsub_3) {
+ NewSubIdx = ARM::qqsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_0) {
+ // 2 D registers -> 1 Q register.
+ if (Size >= 128 && SubIndices[1] == ARM::dsub_1) {
+ if (Size >= 256)
+ NewSubIdx = ARM::qsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_2) {
+ // 2 D registers -> 1 Q register (2nd).
+ if (Size >= 256 && SubIndices[1] == ARM::dsub_3) {
+ NewSubIdx = ARM::qsub_1;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_4) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_5) {
+ NewSubIdx = ARM::qsub_2;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::dsub_6) {
+ // 2 D registers -> 1 Q register (3rd).
+ if (Size == 512 && SubIndices[1] == ARM::dsub_7) {
+ NewSubIdx = ARM::qsub_3;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_0) {
+ // 2 S registers -> 1 D register.
+ if (SubIndices[1] == ARM::ssub_1) {
+ if (Size >= 128)
+ NewSubIdx = ARM::dsub_0;
+ return true;
+ }
+ } else if (SubIndices[0] == ARM::ssub_2) {
+ // 2 S registers -> 1 D register (2nd).
+ if (Size >= 128 && SubIndices[1] == ARM::ssub_3) {
+ NewSubIdx = ARM::dsub_1;
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+
const TargetRegisterClass *
ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;
@@ -481,7 +660,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
///
bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- return ((DisableFramePointerElim(MF) && MFI->hasCalls())||
+ return ((DisableFramePointerElim(MF) && MFI->adjustsStack())||
needsStackRealignment(MF) ||
MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken());
@@ -509,7 +688,7 @@ needsStackRealignment(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
cannotEliminateFrame(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
- if (DisableFramePointerElim(MF) && MFI->hasCalls())
+ if (DisableFramePointerElim(MF) && MFI->adjustsStack())
return true;
return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
|| needsStackRealignment(MF);
@@ -545,24 +724,25 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const {
I != E; ++I) {
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
if (!I->getOperand(i).isFI()) continue;
-
- const TargetInstrDesc &Desc = TII.get(I->getOpcode());
- unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
- if (AddrMode == ARMII::AddrMode3 ||
- AddrMode == ARMII::AddrModeT2_i8)
- return (1 << 8) - 1;
-
- if (AddrMode == ARMII::AddrMode5 ||
- AddrMode == ARMII::AddrModeT2_i8s4)
+ switch (I->getDesc().TSFlags & ARMII::AddrModeMask) {
+ case ARMII::AddrMode3:
+ case ARMII::AddrModeT2_i8:
+ Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode5:
+ case ARMII::AddrModeT2_i8s4:
Limit = std::min(Limit, ((1U << 8) - 1) * 4);
-
- if (AddrMode == ARMII::AddrModeT2_i12 && hasFP(MF))
- // When the stack offset is negative, we will end up using
- // the i8 instructions instead.
- return (1 << 8) - 1;
-
- if (AddrMode == ARMII::AddrMode6)
+ break;
+ case ARMII::AddrModeT2_i12:
+ if (hasFP(MF)) Limit = std::min(Limit, (1U << 8) - 1);
+ break;
+ case ARMII::AddrMode6:
+ // Addressing mode 6 (load/store) instructions can't encode an
+ // immediate offset for stack references.
return 0;
+ default:
+ break;
+ }
break; // At most one FI per instruction
}
}
@@ -750,7 +930,9 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
while (NumExtras && !UnspilledCS1GPRs.empty()) {
unsigned Reg = UnspilledCS1GPRs.back();
UnspilledCS1GPRs.pop_back();
- if (!isReservedReg(MF, Reg)) {
+ if (!isReservedReg(MF, Reg) &&
+ (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) ||
+ Reg == ARM::LR)) {
Extras.push_back(Reg);
NumExtras--;
}
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 456c392..2c9c82d 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -81,6 +81,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ /// canCombinedSubRegIndex - Given a register class and a list of sub-register
+ /// indices, return true if it's possible to combine the sub-register indices
+ /// into one that corresponds to a larger sub-register. Return the new sub-
+ /// register index by reference. Note the new index by be zero if the given
+ /// sub-registers combined to form the whole register.
+ virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
+ SmallVectorImpl<unsigned> &SubIndices,
+ unsigned &NewSubIdx) const;
+
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f84f85a..f2730fc 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -88,6 +88,7 @@ namespace {
void emitWordLE(unsigned Binary);
void emitDWordLE(uint64_t Binary);
void emitConstPoolInstruction(const MachineInstr &MI);
+ void emitMOVi32immInstruction(const MachineInstr &MI);
void emitMOVi2piecesInstruction(const MachineInstr &MI);
void emitLEApcrelJTInstruction(const MachineInstr &MI);
void emitPseudoMoveInstruction(const MachineInstr &MI);
@@ -145,6 +146,15 @@ namespace {
return getMachineOpValue(MI, MI.getOperand(OpIdx));
}
+ /// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+ /// machine operand requires relocation, record the relocation and return zero.
+ unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO,
+ unsigned Reloc);
+ unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx,
+ unsigned Reloc) {
+ return getMovi32Value(MI, MI.getOperand(OpIdx), Reloc);
+ }
+
/// getShiftOp - Return the shift opcode (bit[6:5]) of the immediate value.
///
unsigned getShiftOp(unsigned Imm) const ;
@@ -217,6 +227,31 @@ unsigned ARMCodeEmitter::getShiftOp(unsigned Imm) const {
return 0;
}
+/// getMovi32Value - Return binary encoding of operand for movw/movt. If the
+/// machine operand requires relocation, record the relocation and return zero.
+unsigned ARMCodeEmitter::getMovi32Value(const MachineInstr &MI,
+ const MachineOperand &MO,
+ unsigned Reloc) {
+ assert(((Reloc == ARM::reloc_arm_movt) || (Reloc == ARM::reloc_arm_movw))
+ && "Relocation to this function should be for movt or movw");
+
+ if (MO.isImm())
+ return static_cast<unsigned>(MO.getImm());
+ else if (MO.isGlobal())
+ emitGlobalAddress(MO.getGlobal(), Reloc, true, false);
+ else if (MO.isSymbol())
+ emitExternalSymbolAddress(MO.getSymbolName(), Reloc);
+ else if (MO.isMBB())
+ emitMachineBasicBlock(MO.getMBB(), Reloc);
+ else {
+#ifndef NDEBUG
+ errs() << MO;
+#endif
+ llvm_unreachable("Unsupported operand type for movw/movt");
+ }
+ return 0;
+}
+
/// getMachineOpValue - Return binary encoding of operand. If the machine
/// operand requires relocation, record the relocation and return zero.
unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
@@ -438,6 +473,42 @@ void ARMCodeEmitter::emitConstPoolInstruction(const MachineInstr &MI) {
}
}
+void ARMCodeEmitter::emitMOVi32immInstruction(const MachineInstr &MI) {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ const MachineOperand &MO1 = MI.getOperand(1);
+
+ // Emit the 'movw' instruction.
+ unsigned Binary = 0x30 << 20; // mov: Insts{27-20} = 0b00110000
+
+ unsigned Lo16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movw) & 0xFFFF;
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm12
+ Binary |= Lo16 & 0xFFF; // Insts{11-0} = imm12
+ Binary |= ((Lo16 >> 12) & 0xF) << 16; // Insts{19-16} = imm4
+ emitWordLE(Binary);
+
+ unsigned Hi16 = getMovi32Value(MI, MO1, ARM::reloc_arm_movt) >> 16;
+ // Emit the 'movt' instruction.
+ Binary = 0x34 << 20; // movt: Insts{27-20} = 0b00110100
+
+ // Set the conditional execution predicate.
+ Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+
+ // Encode Rd.
+ Binary |= getMachineOpValue(MI, MO0) << ARMII::RegRdShift;
+
+ // Encode imm16 as imm4:imm1, same as movw above.
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+}
+
void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
const MachineOperand &MO0 = MI.getOperand(0);
const MachineOperand &MO1 = MI.getOperand(1);
@@ -557,7 +628,6 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
switch (Opcode) {
default:
llvm_unreachable("ARMCodeEmitter::emitPseudoInstruction");
- // FIXME: Add support for MOVimm32.
case TargetOpcode::INLINEASM: {
// We allow inline assembler nodes with empty bodies - they can
// implicitly define registers, which is ok for JIT.
@@ -604,6 +674,11 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
emitMiscLoadStoreInstruction(MI, ARM::PC);
break;
}
+
+ case ARM::MOVi32imm:
+ emitMOVi32immInstruction(MI);
+ break;
+
case ARM::MOVi2pieces:
// Two instructions to materialize a constant.
emitMOVi2piecesInstruction(MI);
@@ -706,10 +781,6 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
unsigned ImplicitRn) {
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.Opcode == ARM::BFC) {
- report_fatal_error("ARMv6t2 JIT is not yet supported.");
- }
-
// Part of binary is determined by TableGn.
unsigned Binary = getBinaryCodeForInstr(MI);
@@ -729,6 +800,45 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
Binary |= (ARMRegisterInfo::getRegisterNumbering(ImplicitRd)
<< ARMII::RegRdShift);
+ if (TID.Opcode == ARM::MOVi16) {
+ // Get immediate from MI.
+ unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movw);
+ // Encode imm which is the same as in emitMOVi32immInstruction().
+ Binary |= Lo16 & 0xFFF;
+ Binary |= ((Lo16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if(TID.Opcode == ARM::MOVTi16) {
+ unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
+ ARM::reloc_arm_movt) >> 16);
+ Binary |= Hi16 & 0xFFF;
+ Binary |= ((Hi16 >> 12) & 0xF) << 16;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+ uint32_t v = ~MI.getOperand(2).getImm();
+ int32_t lsb = CountTrailingZeros_32(v);
+ int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
+ // Instr{20-16} = msb, Instr{11-7} = lsb
+ Binary |= (msb & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+ // Encode Rn in Instr{0-3}
+ Binary |= getMachineOpValue(MI, OpIdx++);
+
+ uint32_t lsb = MI.getOperand(OpIdx++).getImm();
+ uint32_t widthm1 = MI.getOperand(OpIdx++).getImm() - 1;
+
+ // Instr{20-16} = widthm1, Instr{11-7} = lsb
+ Binary |= (widthm1 & 0x1F) << 16;
+ Binary |= (lsb & 0x1F) << 7;
+ emitWordLE(Binary);
+ return;
+ }
+
// If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
++OpIdx;
@@ -1366,18 +1476,66 @@ ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
break;
++NumRegs;
}
- Binary |= NumRegs * 2;
+ // Bit 8 will be set if <list> is consecutive 64-bit registers (e.g., D0)
+ // Otherwise, it will be 0, in the case of 32-bit registers.
+ if(Binary & 0x100)
+ Binary |= NumRegs * 2;
+ else
+ Binary |= NumRegs;
emitWordLE(Binary);
}
void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) {
+ unsigned Opcode = MI.getDesc().Opcode;
// Part of binary is determined by TableGn.
unsigned Binary = getBinaryCodeForInstr(MI);
// Set the conditional execution predicate
Binary |= II->getPredicate(&MI) << ARMII::CondShift;
+ switch(Opcode) {
+ default:
+ llvm_unreachable("ARMCodeEmitter::emitMiscInstruction");
+
+ case ARM::FMSTAT:
+ // No further encoding needed.
+ break;
+
+ case ARM::VMRS:
+ case ARM::VMSR: {
+ const MachineOperand &MO0 = MI.getOperand(0);
+ // Encode Rt.
+ Binary |= ARMRegisterInfo::getRegisterNumbering(MO0.getReg())
+ << ARMII::RegRdShift;
+ break;
+ }
+
+ case ARM::FCONSTD:
+ case ARM::FCONSTS: {
+ // Encode Dd / Sd.
+ Binary |= encodeVFPRd(MI, 0);
+
+ // Encode imm., Table A7-18 VFP modified immediate constants
+ const MachineOperand &MO1 = MI.getOperand(1);
+ unsigned Imm = static_cast<unsigned>(MO1.getFPImm()->getValueAPF()
+ .bitcastToAPInt().getHiBits(32).getLimitedValue());
+ unsigned ModifiedImm;
+
+ if(Opcode == ARM::FCONSTS)
+ ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+ (Imm & 0x03F80000) >> 19; // bcdefgh
+ else // Opcode == ARM::FCONSTD
+ ModifiedImm = (Imm & 0x80000000) >> 24 | // a
+ (Imm & 0x007F0000) >> 16; // bcdefgh
+
+ // Insts{19-16} = abcd, Insts{3-0} = efgh
+ Binary |= ((ModifiedImm & 0xF0) >> 4) << 16;
+ Binary |= (ModifiedImm & 0xF);
+ break;
+ }
+ }
+
emitWordLE(Binary);
}
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 845d088..c87f5d7 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -29,6 +29,7 @@ namespace {
ARMExpandPseudo() : MachineFunctionPass(&ID) {}
const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -37,11 +38,31 @@ namespace {
}
private:
+ void TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
bool ExpandMBB(MachineBasicBlock &MBB);
};
char ARMExpandPseudo::ID = 0;
}
+/// TransferImpOps - Transfer implicit operands on the pseudo instruction to
+/// the instructions created from the expansion.
+void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
+ MachineInstrBuilder &UseMI,
+ MachineInstrBuilder &DefMI) {
+ const TargetInstrDesc &Desc = OldMI.getDesc();
+ for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = OldMI.getOperand(i);
+ assert(MO.isReg() && MO.getReg());
+ if (MO.isUse())
+ UseMI.addReg(MO.getReg(), getKillRegState(MO.isKill()));
+ else
+ DefMI.addReg(MO.getReg(),
+ getDefRegState(true) | getDeadRegState(MO.isDead()));
+ }
+}
+
bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -58,52 +79,82 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
? ARM::tLDRpci : ARM::t2LDRpci;
unsigned DstReg = MI.getOperand(0).getReg();
- if (!MI.getOperand(0).isDead()) {
- MachineInstr *NewMI =
- AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
- TII->get(NewLdOpc), DstReg)
- .addOperand(MI.getOperand(1)));
- NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
- BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
- .addReg(DstReg, getDefRegState(true))
- .addReg(DstReg)
- .addOperand(MI.getOperand(2));
- }
+ bool DstIsDead = MI.getOperand(0).isDead();
+ MachineInstrBuilder MIB1 =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::tPICADD))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ TransferImpOps(MI, MIB1, MIB2);
MI.eraseFromParent();
Modified = true;
break;
}
+
case ARM::t2MOVi32imm: {
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = llvm::getInstrPredicate(&MI, PredReg);
unsigned DstReg = MI.getOperand(0).getReg();
- if (!MI.getOperand(0).isDead()) {
- const MachineOperand &MO = MI.getOperand(1);
- MachineInstrBuilder LO16, HI16;
-
- LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
- DstReg);
- HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true)).addReg(DstReg);
-
- if (MO.isImm()) {
- unsigned Imm = MO.getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- LO16 = LO16.addImm(Lo16);
- HI16 = HI16.addImm(Hi16);
- } else {
- const GlobalValue *GV = MO.getGlobal();
- unsigned TF = MO.getTargetFlags();
- LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
- HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
- // FIXME: What's about memoperands?
- }
- AddDefaultPred(LO16);
- AddDefaultPred(HI16);
+ bool DstIsDead = MI.getOperand(0).isDead();
+ const MachineOperand &MO = MI.getOperand(1);
+ MachineInstrBuilder LO16, HI16;
+
+ LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16),
+ DstReg);
+ HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16))
+ .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(DstReg);
+
+ if (MO.isImm()) {
+ unsigned Imm = MO.getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ LO16 = LO16.addImm(Lo16);
+ HI16 = HI16.addImm(Hi16);
+ } else {
+ const GlobalValue *GV = MO.getGlobal();
+ unsigned TF = MO.getTargetFlags();
+ LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
+ HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
}
+ (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ LO16.addImm(Pred).addReg(PredReg);
+ HI16.addImm(Pred).addReg(PredReg);
+ TransferImpOps(MI, LO16, HI16);
+ MI.eraseFromParent();
+ Modified = true;
+ break;
+ }
+
+ case ARM::VMOVQQ: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ bool DstIsDead = MI.getOperand(0).isDead();
+ unsigned EvenDst = TRI->getSubReg(DstReg, ARM::qsub_0);
+ unsigned OddDst = TRI->getSubReg(DstReg, ARM::qsub_1);
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ bool SrcIsKill = MI.getOperand(1).isKill();
+ unsigned EvenSrc = TRI->getSubReg(SrcReg, ARM::qsub_0);
+ unsigned OddSrc = TRI->getSubReg(SrcReg, ARM::qsub_1);
+ MachineInstrBuilder Even =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(EvenSrc, getKillRegState(SrcIsKill)));
+ MachineInstrBuilder Odd =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::VMOVQ))
+ .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead))
+ .addReg(OddSrc, getKillRegState(SrcIsKill)));
+ TransferImpOps(MI, Even, Odd);
MI.eraseFromParent();
Modified = true;
}
- // FIXME: expand t2MOVi32imm
}
MBBI = NMBBI;
}
@@ -113,6 +164,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
+ TRI = MF.getTarget().getRegisterInfo();
bool Modified = false;
for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 616942c..9baef6b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -37,7 +37,8 @@ using namespace llvm;
static cl::opt<bool>
UseRegSeq("neon-reg-sequence", cl::Hidden,
- cl::desc("Use reg_sequence to model ld / st of multiple neon regs"));
+ cl::desc("Use reg_sequence to model ld / st of multiple neon regs"),
+ cl::init(true));
//===--------------------------------------------------------------------===//
/// ARMDAGToDAGISel - ARM specific code to select ARM machine
@@ -164,15 +165,34 @@ private:
ARMCC::CondCodes CCVal, SDValue CCR,
SDValue InFlag);
+ SDNode *SelectConcatVector(SDNode *N);
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// PairDRegs - Insert a pair of double registers into an implicit def to
- /// form a quad register.
+ /// PairDRegs - Form a quad register from a pair of D registers.
+ ///
SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1);
+
+ /// PairDRegs - Form a quad register pair from a pair of Q registers.
+ ///
+ SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1);
+
+ /// QuadDRegs - Form a quad register pair from a quad of D registers.
+ ///
+ SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ /// QuadQRegs - Form 4 consecutive Q registers.
+ ///
+ SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
+
+ /// OctoDRegs - Form 8 consecutive D registers.
+ ///
+ SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3,
+ SDValue V4, SDValue V5, SDValue V6, SDValue V7);
};
}
@@ -940,13 +960,13 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
return NULL;
}
-/// PairDRegs - Insert a pair of double registers into an implicit def to
-/// form a quad register.
+/// PairDRegs - Form a quad register from a pair of D registers.
+///
SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
DebugLoc dl = V0.getNode()->getDebugLoc();
- SDValue SubReg0 = CurDAG->getTargetConstant(ARM::DSUBREG_0, MVT::i32);
- SDValue SubReg1 = CurDAG->getTargetConstant(ARM::DSUBREG_1, MVT::i32);
- if (UseRegSeq) {
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ if (llvm::ModelWithRegSequence()) {
const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
}
@@ -958,6 +978,62 @@ SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
VT, SDValue(Pair, 0), V1, SubReg1);
}
+/// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
+///
+SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
+/// QuadDRegs - Form 4 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// QuadQRegs - Form 4 consecutive Q registers.
+///
+SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+}
+
+/// OctoDRegs - Form 8 consecutive D registers.
+///
+SDNode *ARMDAGToDAGISel::OctoDRegs(EVT VT, SDValue V0, SDValue V1,
+ SDValue V2, SDValue V3,
+ SDValue V4, SDValue V5,
+ SDValue V6, SDValue V7) {
+ DebugLoc dl = V0.getNode()->getDebugLoc();
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
+ SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
+ SDValue SubReg4 = CurDAG->getTargetConstant(ARM::dsub_4, MVT::i32);
+ SDValue SubReg5 = CurDAG->getTargetConstant(ARM::dsub_5, MVT::i32);
+ SDValue SubReg6 = CurDAG->getTargetConstant(ARM::dsub_6, MVT::i32);
+ SDValue SubReg7 = CurDAG->getTargetConstant(ARM::dsub_7, MVT::i32);
+ const SDValue Ops[] ={ V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3,
+ V4, SubReg4, V5, SubReg5, V6, SubReg6, V7, SubReg7 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 16);
+}
+
/// GetNEONSubregVT - Given a type for a 128-bit NEON vector, return the type
/// for a 64-bit subregister of the vector.
static EVT GetNEONSubregVT(EVT VT) {
@@ -1011,7 +1087,34 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
+ if (!llvm::ModelWithRegSequence() || NumVecs < 2)
+ return VLd;
+
+ SDValue RegSeq;
+ SDValue V0 = SDValue(VLd, 0);
+ SDValue V1 = SDValue(VLd, 1);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = SDValue(VLd, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLd, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue D = CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), D);
+ }
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, NumVecs));
+ return NULL;
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1026,9 +1129,24 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ if (llvm::ModelWithRegSequence()) {
+ if (NumVecs == 1) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1));
+ ReplaceUses(SDValue(N, 0), SDValue(Q, 0));
+ } else {
+ SDValue QQ = SDValue(QuadDRegs(MVT::v4i64,
+ SDValue(VLd, 0), SDValue(VLd, 1),
+ SDValue(VLd, 2), SDValue(VLd, 3)), 0);
+ SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ);
+ SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ);
+ ReplaceUses(SDValue(N, 0), Q0);
+ ReplaceUses(SDValue(N, 1), Q1);
+ }
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1));
+ ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ }
}
} else {
// Otherwise, quad registers are loaded with two separate instructions,
@@ -1051,10 +1169,37 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6);
Chain = SDValue(VLdB, NumVecs+1);
- // Combine the even and odd subregs to produce the result.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
- ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ if (llvm::ModelWithRegSequence()) {
+ SDValue V0 = SDValue(VLdA, 0);
+ SDValue V1 = SDValue(VLdB, 0);
+ SDValue V2 = SDValue(VLdA, 1);
+ SDValue V3 = SDValue(VLdB, 1);
+ SDValue V4 = SDValue(VLdA, 2);
+ SDValue V5 = SDValue(VLdB, 2);
+ SDValue V6 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+ 0)
+ : SDValue(VLdA, 3);
+ SDValue V7 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT),
+ 0)
+ : SDValue(VLdB, 3);
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3,
+ V4, V5, V6, V7), 0);
+
+ // Extract out the 3 / 4 Q registers.
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec,
+ dl, VT, RegSeq);
+ ReplaceUses(SDValue(N, Vec), Q);
+ }
+ } else {
+ // Combine the even and odd subregs to produce the result.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec));
+ ReplaceUses(SDValue(N, Vec), SDValue(Q, 0));
+ }
}
}
ReplaceUses(SDValue(N, NumVecs), Chain);
@@ -1102,12 +1247,43 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Align);
if (is64BitVector) {
- unsigned Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ if (llvm::ModelWithRegSequence() && NumVecs >= 2) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+
+ // Form a REG_SEQUENCE to force register allocation.
+ if (NumVecs == 2)
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ else {
+ SDValue V2 = N->getOperand(2+3);
+ // If it's a vld3, form a quad D-register and leave the last part as
+ // an undef.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+ RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+ RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+ RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+ RegSeq));
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ }
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
+ unsigned Opc = DOpcodes[OpcodeIndex];
return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
@@ -1116,48 +1292,114 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
// Quad registers are directly supported for VST1 and VST2,
// storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3)));
+ if (llvm::ModelWithRegSequence() && NumVecs == 2) {
+ // First extract the pair of Q registers.
+ SDValue Q0 = N->getOperand(3);
+ SDValue Q1 = N->getOperand(4);
+
+ // Form a QQ register.
+ SDValue QQ = SDValue(PairQRegs(MVT::v4i64, Q0, Q1), 0);
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, RegVT,
+ QQ));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, RegVT,
+ QQ));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 5 + 4);
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
}
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
- 5 + 2 * NumVecs);
}
// Otherwise, quad registers are stored with two separate instructions,
// where one stores the even registers and the other stores the odd registers.
+ if (llvm::ModelWithRegSequence()) {
+ // Form the QQQQ REG_SEQUENCE.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3));
+ V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
- Ops.push_back(Reg0); // post-access address offset
+ SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
- // Store the even subregs.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
- N->getOperand(Vec+3)));
- Ops.push_back(Pred);
- Ops.push_back(Reg0); // predicate register
- Ops.push_back(Chain);
- unsigned Opc = QOpcodes0[OpcodeIndex];
- SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStA, 1);
-
- // Store the odd subregs.
- Ops[0] = SDValue(VStA, 0); // MemAddr
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
- N->getOperand(Vec+3));
- Ops[NumVecs+5] = Chain;
- Opc = QOpcodes1[OpcodeIndex];
- SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+6);
- Chain = SDValue(VStB, 1);
- ReplaceUses(SDValue(N, 0), Chain);
- return NULL;
+ // Store the even D registers.
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ Ops.push_back(Reg0); // post-access address offset
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl,
+ RegVT, RegSeq));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd D registers.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl,
+ RegVT, RegSeq);
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+ } else {
+ Ops.push_back(Reg0); // post-access address offset
+
+ // Store the even subregs.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT,
+ N->getOperand(Vec+3)));
+ Ops.push_back(Pred);
+ Ops.push_back(Reg0); // predicate register
+ Ops.push_back(Chain);
+ unsigned Opc = QOpcodes0[OpcodeIndex];
+ SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStA, 1);
+
+ // Store the odd subregs.
+ Ops[0] = SDValue(VStA, 0); // MemAddr
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT,
+ N->getOperand(Vec+3));
+ Ops[NumVecs+5] = Chain;
+ Opc = QOpcodes1[OpcodeIndex];
+ SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
+ MVT::Other, Ops.data(), NumVecs+6);
+ Chain = SDValue(VStB, 1);
+ ReplaceUses(SDValue(N, 0), Chain);
+ return NULL;
+ }
}
SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
@@ -1180,11 +1422,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
// Quad registers are handled by load/store of subregs. Find the subreg info.
unsigned NumElts = 0;
int SubregIdx = 0;
+ bool Even = false;
EVT RegVT = VT;
if (!is64BitVector) {
RegVT = GetNEONSubregVT(VT);
NumElts = RegVT.getVectorNumElements();
- SubregIdx = (Lane < NumElts) ? ARM::DSUBREG_0 : ARM::DSUBREG_1;
+ SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1;
+ Even = Lane < NumElts;
}
unsigned OpcodeIndex;
@@ -1211,8 +1455,35 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
unsigned Opc = 0;
if (is64BitVector) {
Opc = DOpcodes[OpcodeIndex];
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(N->getOperand(Vec+3));
+ if (llvm::ModelWithRegSequence()) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+
+ // Now extract the D registers back out.
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT,
+ RegSeq));
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT,
+ RegSeq));
+ if (NumVecs > 2)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,
+ RegSeq));
+ if (NumVecs > 3)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,
+ RegSeq));
+ } else {
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(N->getOperand(Vec+3));
+ }
} else {
// Check if this is loading the even or odd subreg of a Q register.
if (Lane < NumElts) {
@@ -1221,10 +1492,32 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
Lane -= NumElts;
Opc = QOpcodes1[OpcodeIndex];
}
- // Extract the subregs of the input vector.
- for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
- N->getOperand(Vec+3)));
+
+ if (llvm::ModelWithRegSequence()) {
+ SDValue RegSeq;
+ SDValue V0 = N->getOperand(0+3);
+ SDValue V1 = N->getOperand(1+3);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0);
+ } else {
+ SDValue V2 = N->getOperand(2+3);
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : N->getOperand(3+3);
+ RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0);
+ }
+
+ // Extract the subregs of the input vector.
+ unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT,
+ RegSeq));
+ } else {
+ // Extract the subregs of the input vector.
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT,
+ N->getOperand(Vec+3)));
+ }
}
Ops.push_back(getI32Imm(Lane));
Ops.push_back(Pred);
@@ -1236,8 +1529,60 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
std::vector<EVT> ResTys(NumVecs, RegVT);
ResTys.push_back(MVT::Other);
- SDNode *VLdLn =
- CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6);
+ SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6);
+
+ if (llvm::ModelWithRegSequence()) {
+ // Form a REG_SEQUENCE to force register allocation.
+ SDValue RegSeq;
+ if (is64BitVector) {
+ SDValue V0 = SDValue(VLdLn, 0);
+ SDValue V1 = SDValue(VLdLn, 1);
+ if (NumVecs == 2) {
+ RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0);
+ } else {
+ SDValue V2 = SDValue(VLdLn, 2);
+ // If it's a vld3, form a quad D-register but discard the last part.
+ SDValue V3 = (NumVecs == 3)
+ ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
+ : SDValue(VLdLn, 3);
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0);
+ }
+ } else {
+ // For 128-bit vectors, take the 64-bit results of the load and insert them
+ // as subregs into the result.
+ SDValue V[8];
+ for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) {
+ if (Even) {
+ V[i] = SDValue(VLdLn, Vec);
+ V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ } else {
+ V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+ V[i+1] = SDValue(VLdLn, Vec);
+ }
+ }
+ if (NumVecs == 3)
+ V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
+ dl, RegVT), 0);
+
+ if (NumVecs == 2)
+ RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0);
+ else
+ RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3],
+ V[4], V[5], V[6], V[7]), 0);
+ }
+
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
+ unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
+ for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+ ReplaceUses(SDValue(N, Vec),
+ CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq));
+ ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs));
+ return NULL;
+ }
+
// For a 64-bit vector load to D registers, nothing more needs to be done.
if (is64BitVector)
return VLdLn;
@@ -1481,6 +1826,21 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) {
return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5);
}
+SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
+ // The only time a CONCAT_VECTORS operation can have legal types is when
+ // two 64-bit vectors are concatenated to a 128-bit vector.
+ EVT VT = N->getValueType(0);
+ if (!VT.is128BitVector() || N->getNumOperands() != 2)
+ llvm_unreachable("unexpected CONCAT_VECTORS");
+ DebugLoc dl = N->getDebugLoc();
+ SDValue V0 = N->getOperand(0);
+ SDValue V1 = N->getOperand(1);
+ SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
+ SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
+ const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
+ return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+}
+
SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
@@ -1695,8 +2055,12 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue Pred = getAL(CurDAG);
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
- return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
- Ops, 5);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ SDNode *Ret = CurDAG->getMachineNode(ARM::VLDMQ, dl,
+ MVT::v2f64, MVT::Other, Ops, 5);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ return Ret;
}
// Other cases are autogenerated.
break;
@@ -1712,7 +2076,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
AM5Opc, Pred, PredReg, Chain };
- return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+ MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+ SDNode *Ret = CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
+ return Ret;
}
// Other cases are autogenerated.
break;
@@ -1971,7 +2339,11 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
}
+ break;
}
+
+ case ISD::CONCAT_VECTORS:
+ return SelectConcatVector(N);
}
return SelectCode(N);
@@ -1995,3 +2367,9 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new ARMDAGToDAGISel(TM, OptLevel);
}
+
+/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model
+/// operations involving sub-registers.
+bool llvm::ModelWithRegSequence() {
+ return UseRegSeq;
+}
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index d3842a6..b8126a3 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -94,7 +94,10 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
}
setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
+ if (llvm::ModelWithRegSequence())
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
+ else
+ setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
@@ -360,8 +363,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::BSWAP, MVT::i32, Expand);
// These are expanded into libcalls.
- setOperationAction(ISD::SDIV, MVT::i32, Expand);
- setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ if (!Subtarget->hasDivide()) {
+ // v7M has a hardware divider
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ }
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
@@ -373,6 +379,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
+ setOperationAction(ISD::TRAP, MVT::Other, Legal);
+
// Use the default implementation.
setOperationAction(ISD::VASTART, MVT::Other, Custom);
setOperationAction(ISD::VAARG, MVT::Other, Expand);
@@ -387,7 +395,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
- if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
+ // If the subtarget does not have extract instructions, sign_extend_inreg
+ // needs to be expanded. Extract is available in ARM mode on v6 and up,
+ // and on most Thumb2 implementations.
+ if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops())
+ || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
}
@@ -400,6 +412,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
+ setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
setOperationAction(ISD::SETCC, MVT::i32, Expand);
setOperationAction(ISD::SETCC, MVT::f32, Expand);
@@ -451,9 +465,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
+ setTargetDAGCombine(ISD::MUL);
setStackPointerRegisterToSaveRestore(ARM::SP);
- setSchedulingPreference(SchedulingForRegPressure);
+
+ if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
+ setSchedulingPreference(Sched::RegPressure);
+ else
+ setSchedulingPreference(Sched::Hybrid);
// FIXME: If-converter should use instruction latency to determine
// profitability rather than relying on fixed limits.
@@ -567,11 +586,35 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
}
}
+/// getRegClassFor - Return the register class that should be used for the
+/// specified value type.
+TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
+ // Map v4i64 to QQ registers but do not make the type legal. Similarly map
+ // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
+ // load / store 4 to 8 consecutive D registers.
+ if (Subtarget->hasNEON()) {
+ if (VT == MVT::v4i64)
+ return ARM::QQPRRegisterClass;
+ else if (VT == MVT::v8i64)
+ return ARM::QQQQPRRegisterClass;
+ }
+ return TargetLowering::getRegClassFor(VT);
+}
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const {
return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 0 : 1;
}
+Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
+ for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT.isFloatingPoint() || VT.isVector())
+ return Sched::Latency;
+ }
+ return Sched::RegPressure;
+}
+
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//
@@ -1507,6 +1550,23 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
}
SDValue
+ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Val = Subtarget->isThumb() ?
+ DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
+ DAG.getConstant(0, MVT::i32);
+ return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
+ Op.getOperand(1), Val);
+}
+
+SDValue
+ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
+ DebugLoc dl = Op.getDebugLoc();
+ return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
+ Op.getOperand(1), DAG.getConstant(0, MVT::i32));
+}
+
+SDValue
ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget)
const {
@@ -1545,12 +1605,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
return Result;
}
- case Intrinsic::eh_sjlj_setjmp:
- SDValue Val = Subtarget->isThumb() ?
- DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) :
- DAG.getConstant(0, MVT::i32);
- return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(1),
- Val);
}
}
@@ -1652,7 +1706,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
RC = ARM::GPRRegisterClass;
// Transform the arguments stored in physical registers into virtual ones.
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
SDValue ArgValue2;
@@ -2092,9 +2146,31 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
}
+SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ if (Depth) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset = DAG.getConstant(4, MVT::i32);
+ return DAG.getLoad(VT, dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
+ NULL, 0, false, false, 0);
+ }
+
+ // Return LR, which contains the return address. Mark it an implicit live-in.
+ unsigned Reg = MF.addLiveIn(ARM::LR, ARM::GPRRegisterClass);
+ return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
+}
+
SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2107,116 +2183,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
return FrameAddr;
}
-SDValue
-ARMTargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
- // Do repeated 4-byte loads and stores. To be improved.
- // This requires 4-byte alignment.
- if ((Align & 3) != 0)
- return SDValue();
- // This requires the copy size to be a constant, preferrably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
- return SDValue();
-
- unsigned BytesLeft = SizeVal & 3;
- unsigned NumMemOps = SizeVal >> 2;
- unsigned EmittedNumMemOps = 0;
- EVT VT = MVT::i32;
- unsigned VTSize = 4;
- unsigned i = 0;
- const unsigned MAX_LOADS_IN_LDM = 6;
- SDValue TFOps[MAX_LOADS_IN_LDM];
- SDValue Loads[MAX_LOADS_IN_LDM];
- uint64_t SrcOff = 0, DstOff = 0;
-
- // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
- // same number of stores. The loads and stores will get combined into
- // ldm/stm later on.
- while (EmittedNumMemOps < NumMemOps) {
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- SrcOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- for (i = 0;
- i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, isVolatile, false, 0);
- DstOff += VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- EmittedNumMemOps += i;
- }
-
- if (BytesLeft == 0)
- return Chain;
-
- // Issue loads / stores for the trailing (1 - 3) bytes.
- unsigned BytesLeftSave = BytesLeft;
- i = 0;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- Loads[i] = DAG.getLoad(VT, dl, Chain,
- DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
- DAG.getConstant(SrcOff, MVT::i32)),
- SrcSV, SrcSVOff + SrcOff, false, false, 0);
- TFOps[i] = Loads[i].getValue(1);
- ++i;
- SrcOff += VTSize;
- BytesLeft -= VTSize;
- }
- Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-
- i = 0;
- BytesLeft = BytesLeftSave;
- while (BytesLeft) {
- if (BytesLeft >= 2) {
- VT = MVT::i16;
- VTSize = 2;
- } else {
- VT = MVT::i8;
- VTSize = 1;
- }
-
- TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
- DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
- DAG.getConstant(DstOff, MVT::i32)),
- DstSV, DstSVOff + DstOff, false, false, 0);
- ++i;
- DstOff += VTSize;
- BytesLeft -= VTSize;
- }
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
-}
-
/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to
/// expand a bit convert where either the source or destination type is i64 to
/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
@@ -2434,9 +2400,9 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
// Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(0, MVT::i32));
+ DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
- DAG.getConstant(1, MVT::i32));
+ DAG.getConstant(1, MVT::i32));
// First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
// captures the result into a carry flag.
@@ -2879,21 +2845,60 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
}
- // If there are only 2 elements in a 128-bit vector, insert them into an
- // undef vector. This handles the common case for 128-bit vector argument
- // passing, where the insertions should be translated to subreg accesses
- // with no real instructions.
- if (VT.is128BitVector() && Op.getNumOperands() == 2) {
- SDValue Val = DAG.getUNDEF(VT);
- SDValue Op0 = Op.getOperand(0);
- SDValue Op1 = Op.getOperand(1);
- if (Op0.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op0,
- DAG.getIntPtrConstant(0));
- if (Op1.getOpcode() != ISD::UNDEF)
- Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, Op1,
- DAG.getIntPtrConstant(1));
- return Val;
+ // Scan through the operands to see if only one value is used.
+ unsigned NumElts = VT.getVectorNumElements();
+ bool isOnlyLowElement = true;
+ bool usesOnlyOneValue = true;
+ bool isConstant = true;
+ SDValue Value;
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue V = Op.getOperand(i);
+ if (V.getOpcode() == ISD::UNDEF)
+ continue;
+ if (i > 0)
+ isOnlyLowElement = false;
+ if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
+ isConstant = false;
+
+ if (!Value.getNode())
+ Value = V;
+ else if (V != Value)
+ usesOnlyOneValue = false;
+ }
+
+ if (!Value.getNode())
+ return DAG.getUNDEF(VT);
+
+ if (isOnlyLowElement)
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
+
+ // If all elements are constants, fall back to the default expansion, which
+ // will generate a load from the constant pool.
+ if (isConstant)
+ return SDValue();
+
+ // Use VDUP for non-constant splats.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (usesOnlyOneValue && EltSize <= 32)
+ return DAG.getNode(ARMISD::VDUP, dl, VT, Value);
+
+ // Vectors with 32- or 64-bit elements can be built by directly assigning
+ // the subregisters.
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ SDValue Elt = Op.getOperand(i);
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+ Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt);
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt,
+ DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
}
return SDValue();
@@ -3083,8 +3088,8 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
- if (VT.getVectorNumElements() == 4 &&
- (VT.is128BitVector() || VT.is64BitVector())) {
+ unsigned NumElts = VT.getVectorNumElements();
+ if (NumElts == 4) {
unsigned PFIndexes[4];
for (unsigned i = 0; i != 4; ++i) {
if (ShuffleMask[i] < 0)
@@ -3096,7 +3101,6 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
// Compute the index in the perfect shuffle table.
unsigned PFTableIndex =
PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
-
unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
unsigned Cost = (PFEntry >> 30);
@@ -3104,6 +3108,29 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
}
+ // Implement shuffles with 32- or 64-bit elements as subreg copies.
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ if (EltSize >= 32) {
+ // Do the expansion with floating-point types, since that is what the VFP
+ // registers are defined to use, and since i64 is not legal.
+ EVT EltVT = EVT::getFloatingPointVT(EltSize);
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1);
+ V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2);
+ SDValue Val = DAG.getUNDEF(VecVT);
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (ShuffleMask[i] < 0)
+ continue;
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
+ ShuffleMask[i] < (int)NumElts ? V1 : V2,
+ DAG.getConstant(ShuffleMask[i] & (NumElts-1),
+ MVT::i32));
+ Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val,
+ Elt, DAG.getConstant(i, MVT::i32));
+ }
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val);
+ }
+
return SDValue();
}
@@ -3158,9 +3185,11 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
- case ISD::RETURNADDR: break;
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
+ case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
+ case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
Subtarget);
case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG);
@@ -3667,6 +3696,62 @@ static SDValue PerformSUBCombine(SDNode *N,
return SDValue();
}
+static SDValue PerformMULCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ SelectionDAG &DAG = DCI.DAG;
+
+ if (Subtarget->isThumb1Only())
+ return SDValue();
+
+ if (DAG.getMachineFunction().
+ getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ return SDValue();
+
+ if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return SDValue();
+
+ uint64_t MulAmt = C->getZExtValue();
+ unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
+ ShiftAmt = ShiftAmt & (32 - 1);
+ SDValue V = N->getOperand(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ SDValue Res;
+ MulAmt >>= ShiftAmt;
+ if (isPowerOf2_32(MulAmt - 1)) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ Res = DAG.getNode(ISD::ADD, DL, VT,
+ V, DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt-1),
+ MVT::i32)));
+ } else if (isPowerOf2_32(MulAmt + 1)) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ Res = DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::SHL, DL, VT,
+ V, DAG.getConstant(Log2_32(MulAmt+1),
+ MVT::i32)),
+ V);
+ } else
+ return SDValue();
+
+ if (ShiftAmt != 0)
+ Res = DAG.getNode(ISD::SHL, DL, VT, Res,
+ DAG.getConstant(ShiftAmt, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4053,6 +4138,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
@@ -4432,9 +4518,11 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
+ Ptr = LD->getBasePtr();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
+ Ptr = ST->getBasePtr();
} else
return false;
@@ -4442,13 +4530,25 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
- isInc, DAG);
+ isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
if (!isLegal)
return false;
+ if (Ptr != Base) {
+ // Swap base ptr and offset to catch more post-index load / store when
+ // it's legal. In Thumb2 mode, offset must be an immediate.
+ if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
+ !Subtarget->isThumb2())
+ std::swap(Base, Offset);
+
+ // Post-indexed load / store update the base pointer.
+ if (Ptr != Base)
+ return false;
+ }
+
AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
return true;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index d8a230f..9c7517c 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -236,13 +236,19 @@ namespace llvm {
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const;
- virtual const ARMSubtarget* getSubtarget() const {
+ const ARMSubtarget* getSubtarget() const {
return Subtarget;
}
+ /// getRegClassFor - Return the register class that should be used for the
+ /// specified value type.
+ virtual TargetRegisterClass *getRegClassFor(EVT VT) const;
+
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
+ Sched::Preference getSchedulingPreference(SDNode *N) const;
+
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@@ -281,7 +287,8 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG,
const CCValAssign &VA,
ISD::ArgFlagsTy Flags) const;
- SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
const ARMSubtarget *Subtarget) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -296,20 +303,12 @@ namespace llvm {
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index b466d0d..d487df1 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -228,7 +228,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin,
"", itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
}
@@ -240,7 +240,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -252,7 +252,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = !strconcat(opc, asm);
+ let AsmString = !strconcat(opc, asm);
let Pattern = pattern;
let isPredicable = 0;
list<Predicate> Predicates = [IsARM];
@@ -268,7 +268,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -280,7 +280,7 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
@@ -959,7 +959,7 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb];
}
@@ -995,7 +995,7 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1140,7 +1140,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p, cc_out:$s));
- let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1152,7 +1152,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb2];
}
@@ -1163,7 +1163,7 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [IsThumb1Only];
}
@@ -1280,7 +1280,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
- let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+ let AsmString = !strconcat(opc, !strconcat("${p}", asm));
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
}
@@ -1292,7 +1292,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
: InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
- let AsmString = asm;
+ let AsmString = asm;
let Pattern = pattern;
list<Predicate> Predicates = [HasVFP2];
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index ce5f2f8..f3156d9 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,7 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>,
SDTCisInt<2>]>;
+def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>;
def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
@@ -100,7 +101,10 @@ def ARMsra_flag : SDNode<"ARMISD::SRA_FLAG", SDTIntUnaryOp, [SDNPOutFlag]>;
def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
-def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP",
+ SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain]>;
+def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP",
+ SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain]>;
def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
[SDNPHasChain]>;
@@ -128,6 +132,8 @@ def NoVFP : Predicate<"!Subtarget->hasVFP2()">;
def HasVFP2 : Predicate<"Subtarget->hasVFP2()">;
def HasVFP3 : Predicate<"Subtarget->hasVFP3()">;
def HasNEON : Predicate<"Subtarget->hasNEON()">;
+def HasDivide : Predicate<"Subtarget->hasDivide()">;
+def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">;
def UseNEONForFP : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
def IsThumb : Predicate<"Subtarget->isThumb()">;
@@ -654,12 +660,12 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
- "@ ADJCALLSTACKUP $amt1",
+ "${:comment} ADJCALLSTACKUP $amt1",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
def ADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
- "@ ADJCALLSTACKDOWN $amt",
+ "${:comment} ADJCALLSTACKDOWN $amt",
[(ARMcallseq_start timm:$amt)]>;
}
@@ -789,8 +795,11 @@ def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
}
// A5.4 Permanently UNDEFINED instructions.
-def TRAP : AI<(outs), (ins), MiscFrm, NoItinerary, "trap", "",
- [/* For disassembly only; pattern left blank */]>,
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
+ ".long 0xe7ffdefe ${:comment} trap", [(trap)]>,
Requires<[IsARM]> {
let Inst{27-25} = 0b011;
let Inst{24-20} = 0b11111;
@@ -843,25 +852,19 @@ def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p),
Pseudo, IIC_iALUi,
- !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, ($label-(",
- "${:private}PCRELL${:uid}+8))\n"),
- !strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
- []>;
+ "adr$p\t$dst, #$label", []>;
def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
- Pseudo, IIC_iALUi,
- !strconcat(!strconcat(".set ${:private}PCRELV${:uid}, "
- "(${label}_${id}-(",
- "${:private}PCRELL${:uid}+8))\n"),
- !strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
- []> {
+ Pseudo, IIC_iALUi,
+ "adr$p\t$dst, #${label}_${id}", []> {
let Inst{25} = 1;
}
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Control Flow Instructions.
@@ -1134,7 +1137,8 @@ def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
[(set GPR:$dst, (load addrmode2:$addr))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr", []>;
@@ -1156,7 +1160,7 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrsb", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrd", "\t$dst1, $addr",
@@ -1215,7 +1219,7 @@ def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
"ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
Requires<[IsARM, HasV5TE]>;
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
@@ -1264,7 +1268,7 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
[(truncstorei8 GPR:$src, addrmode2:$addr)]>;
// Store doubleword
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStorer,
"strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
@@ -1356,7 +1360,7 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb),
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops),
IndexModeNone, LdStMulFrm, IIC_iLoadm,
@@ -1367,9 +1371,9 @@ def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
IndexModeUpd, LdStMulFrm, IIC_iLoadm,
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>;
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops),
IndexModeNone, LdStMulFrm, IIC_iStorem,
@@ -1380,7 +1384,7 @@ def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
IndexModeUpd, LdStMulFrm, IIC_iStorem,
"stm${addr:submode}${p}\t$addr!, $srcs",
"$addr.addr = $wb", []>;
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
//===----------------------------------------------------------------------===//
// Move Instructions.
@@ -2198,6 +2202,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2221,6 +2226,7 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
RegConstraint<"$false = $dst">, UnaryDP {
let Inst{25} = 1;
}
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -2528,12 +2534,12 @@ let Defs =
def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+ "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
"add\t$val, pc, #8\n\t"
"str\t$val, [$src, #+4]\n\t"
"mov\tr0, #0\n\t"
"add\tpc, pc, #0\n\t"
- "mov\tr0, #1 @ eh_setjmp end", "",
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, HasVFP2]>;
}
@@ -2543,16 +2549,30 @@ let Defs =
def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val),
AddrModeNone, SizeSpecial, IndexModeNone,
Pseudo, NoItinerary,
- "str\tsp, [$src, #+8] @ eh_setjmp begin\n\t"
+ "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t"
"add\t$val, pc, #8\n\t"
"str\t$val, [$src, #+4]\n\t"
"mov\tr0, #0\n\t"
"add\tpc, pc, #0\n\t"
- "mov\tr0, #1 @ eh_setjmp end", "",
+ "mov\tr0, #1 ${:comment} eh_setjmp end", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>,
Requires<[IsARM, NoVFP]>;
}
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def Int_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary,
+ "ldr\tsp, [$src, #8]\n\t"
+ "ldr\t$scratch, [$src, #4]\n\t"
+ "ldr\tr7, [$src]\n\t"
+ "bx\t$scratch", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsARM, IsDarwin]>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index d5ce2b8..197ec16 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -115,7 +115,7 @@ def h64imm : Operand<i64> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
// Use vldmia to load a Q register as a D register pair.
// This is equivalent to VLDMD except that it has a Q register operand
// instead of a pair of D registers.
@@ -123,11 +123,6 @@ def VLDMQ
: AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
IndexModeNone, IIC_fpLoadm,
"vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
-def VLDMQ_UPD
- : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p),
- IndexModeUpd, IIC_fpLoadm,
- "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}",
- "$addr.base = $wb", []>;
// Use vld1 to load a Q register as a D register pair.
// This alternative to VLDMQ allows an alignment to be specified.
@@ -135,13 +130,9 @@ def VLDMQ_UPD
def VLD1q
: NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
-def VLD1q_UPD
- : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
- "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
-} // mayLoad = 1
+} // mayLoad = 1, neverHasSideEffects = 1
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
// Use vstmia to store a Q register as a D register pair.
// This is equivalent to VSTMD except that it has a Q register operand
// instead of a pair of D registers.
@@ -149,11 +140,6 @@ def VSTMQ
: AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
IndexModeNone, IIC_fpStorem,
"vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
-def VSTMQ_UPD
- : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p),
- IndexModeUpd, IIC_fpStorem,
- "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}",
- "$addr.base = $wb", []>;
// Use vst1 to store a Q register as a D register pair.
// This alternative to VSTMQ allows an alignment to be specified.
@@ -161,14 +147,9 @@ def VSTMQ_UPD
def VST1q
: NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
-def VST1q_UPD
- : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
- IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
- "$addr.addr = $wb", []>;
-} // mayStore = 1
+} // mayStore = 1, neverHasSideEffects = 1
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// VLD1 : Vector Load (multiple single elements)
class VLD1D<bits<4> op7_4, string Dt>
@@ -492,9 +473,9 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
// VLD3DUP : Vector Load (single 3-element structure to all lanes)
// VLD4DUP : Vector Load (single 4-element structure to all lanes)
// FIXME: Not yet implemented.
-} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
// VST1 : Vector Store (multiple single elements)
class VST1D<bits<4> op7_4, string Dt>
@@ -807,7 +788,7 @@ def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
-} // mayStore = 1, hasExtraSrcRegAllocReq = 1
+} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1
//===----------------------------------------------------------------------===//
@@ -815,27 +796,32 @@ def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
//===----------------------------------------------------------------------===//
// Extract D sub-registers of Q registers.
-// (arm_dsubreg_0 is 5; arm_dsubreg_1 is 6)
def DSubReg_i8_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 8, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/8, MVT::i32);
}]>;
def DSubReg_i16_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 4, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/4, MVT::i32);
}]>;
def DSubReg_i32_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue() / 2, MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue()/2, MVT::i32);
}]>;
def DSubReg_f64_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + N->getZExtValue(), MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32);
}]>;
def DSubReg_f64_other_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(5 + (1 - N->getZExtValue()), MVT::i32);
+ assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()),
+ MVT::i32);
}]>;
// Extract S sub-registers of Q/D registers.
-// (arm_ssubreg_0 is 1; arm_ssubreg_1 is 2; etc.)
def SSubReg_f32_reg : SDNodeXForm<imm, [{
- return CurDAG->getTargetConstant(1 + N->getZExtValue(), MVT::i32);
+ assert(ARM::ssub_3 == ARM::ssub_0+3 && "Unexpected subreg numbering");
+ return CurDAG->getTargetConstant(ARM::ssub_0 + N->getZExtValue(), MVT::i32);
}]>;
// Translate lane numbers from Q registers to D subregs.
@@ -2829,11 +2815,21 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// VMOV : Vector Move (Register)
+let neverHasSideEffects = 1 in {
def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+// Pseudo vector move instructions for QQ and QQQQ registers. This should
+// be expanded after register allocation is completed.
+def VMOVQQ : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
+ NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+
+def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
+ NoItinerary, "${:comment} vmov\t$dst, $src", []>;
+} // neverHasSideEffects
+
// VMOV : Vector Move (Immediate)
// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
@@ -2871,6 +2867,7 @@ def vmovImm64 : PatLeaf<(build_vector), [{
// Note: Some of the cmode bits in the following VMOV instructions need to
// be encoded based on the immed values.
+let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
(ins h8imm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
@@ -2906,6 +2903,7 @@ def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
(ins h64imm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
@@ -3018,11 +3016,11 @@ def : Pat<(v2f64 (insertelt QPR:$src1, DPR:$src2, imm:$src3)),
(INSERT_SUBREG QPR:$src1, DPR:$src2, (DSubReg_f64_reg imm:$src3))>;
def : Pat<(v2f32 (scalar_to_vector SPR:$src)),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v2f64 (scalar_to_vector (f64 DPR:$src))),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, arm_dsubreg_0)>;
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), DPR:$src, dsub_0)>;
def : Pat<(v4f32 (scalar_to_vector SPR:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, arm_ssubreg_0)>;
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), SPR:$src, ssub_0)>;
def : Pat<(v8i8 (scalar_to_vector GPR:$src)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0))>;
@@ -3034,15 +3032,15 @@ def : Pat<(v2i32 (scalar_to_vector GPR:$src)),
def : Pat<(v16i8 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
(VSETLNi8 (v8i8 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
def : Pat<(v8i16 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
(VSETLNi16 (v4i16 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
def : Pat<(v4i32 (scalar_to_vector GPR:$src)),
(INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
(VSETLNi32 (v2i32 (IMPLICIT_DEF)), GPR:$src, (i32 0)),
- arm_dsubreg_0)>;
+ dsub_0)>;
// VDUP : Vector Duplicate (from ARM core register to all elements)
@@ -3376,27 +3374,27 @@ def VTBX4
class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
: NEONFPPat<(ResTy (OpNode SPR:$a)),
(EXTRACT_SUBREG (OpTy (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0))),
- arm_ssubreg_0)>;
+ SPR:$a, ssub_0))),
+ ssub_0)>;
class N3VSPat<SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
(EXTRACT_SUBREG (v2f32
(Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
+ SPR:$a, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0))),
- arm_ssubreg_0)>;
+ SPR:$b, ssub_0))),
+ ssub_0)>;
class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
: NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
(EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$acc, arm_ssubreg_0),
+ SPR:$acc, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$a, arm_ssubreg_0),
+ SPR:$a, ssub_0),
(INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
- SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
+ SPR:$b, ssub_0)),
+ ssub_0)>;
// These need separate instructions because they must use DPR_VFP2 register
// class which have SPR sub-registers.
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index e3ca536..40f924b 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -127,12 +127,12 @@ def t_addrmode_sp : Operand<i32>,
let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
- "@ tADJCALLSTACKUP $amt1",
+ "${:comment} tADJCALLSTACKUP $amt1",
[(ARMcallseq_end imm:$amt1, imm:$amt2)]>, Requires<[IsThumb1Only]>;
def tADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
- "@ tADJCALLSTACKDOWN $amt",
+ "${:comment} tADJCALLSTACKDOWN $amt",
[(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
}
@@ -254,14 +254,14 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
// Pseudo instruction that will expand into a tSUBspi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs),
- NoItinerary, "@ sub\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} sub\t$dst, $rhs", []>;
def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
- NoItinerary, "@ add\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} add\t$dst, $rhs", []>;
let Defs = [CPSR] in
def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
- NoItinerary, "@ and\t$dst, $rhs", []>;
+ NoItinerary, "${:comment} and\t$dst, $rhs", []>;
} // usesCustomInserter
//===----------------------------------------------------------------------===//
@@ -374,7 +374,7 @@ let isBranch = 1, isTerminator = 1 in {
// Far jump
let Defs = [LR] in
def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
- "bl\t$target\t@ far jump",[]>;
+ "bl\t$target\t${:comment} far jump",[]>;
def tBR_JTr : T1JTI<(outs),
(ins tGPR:$target, jtblock_operand:$jt, i32imm:$id),
@@ -417,9 +417,13 @@ def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
}
}
-// A8.6.16 B: Encoding T1 -- for disassembly only
+// A8.6.16 B: Encoding T1
// If Inst{11-8} == 0b1110 then UNDEFINED
-def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+// FIXME: Temporary emitted as raw bytes until this pseudo-op will be added to
+// binutils
+let isBarrier = 1, isTerminator = 1 in
+def tTRAP : TI<(outs), (ins), IIC_Br,
+ ".short 0xdefe ${:comment} trap", [(trap)]>, Encoding16 {
let Inst{15-12} = 0b1101;
let Inst{11-8} = 0b1110;
}
@@ -476,7 +480,7 @@ def tLDRspi : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
// Special instruction for restore. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
-let canFoldAsLoad = 1, mayLoad = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -490,7 +494,8 @@ def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1,
+ isReMaterializable = 1 in
def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -527,7 +532,7 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
[(store tGPR:$src, t_addrmode_sp:$addr)]>,
T1LdStSP<{0,?,?}>;
-let mayStore = 1 in {
+let mayStore = 1, neverHasSideEffects = 1 in {
// Special instruction for spill. It cannot clobber condition register
// when it's expanded by eliminateCallFramePseudoInstr().
def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
@@ -540,7 +545,7 @@ def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStorei,
//
// These requires base address to be written back or one of the loaded regs.
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def tLDM : T1I<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops),
IIC_iLoadm,
@@ -553,9 +558,9 @@ def tLDM_UPD : T1It<(outs tGPR:$wb),
"ldm${addr:submode}${p}\t$addr!, $dsts",
"$addr.addr = $wb", []>,
T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects = 1, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def tSTM_UPD : T1It<(outs tGPR:$wb),
(ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops),
IIC_iStorem,
@@ -866,11 +871,12 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
PseudoInst<(outs tGPR:$dst), (ins tGPR:$false, tGPR:$true, pred:$cc),
- NoItinerary, "@ tMOVCCr $cc",
+ NoItinerary, "${:comment} tMOVCCr $cc",
[/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
// 16-bit movcc in IT blocks for Thumb2.
+let neverHasSideEffects = 1 in {
def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
"mov", "\t$dst, $rhs", []>,
T1Special<{1,0,?,?}>;
@@ -878,9 +884,12 @@ def tMOVCCr : T1pIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iCMOVr,
def tMOVCCi : T1pIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs), IIC_iCMOVi,
"mov", "\t$dst, $rhs", []>,
T1General<{1,0,0,?,?}>;
+} // neverHasSideEffects
// tLEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p\t$dst, #$label", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
@@ -889,6 +898,7 @@ def tLEApcrelJT : T1I<(outs tGPR:$dst),
(ins i32imm:$label, nohash_imm:$id, pred:$p),
IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>,
T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// TLS Instructions
@@ -918,16 +928,32 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
}
+
+// FIXME: Non-Darwin version(s)
+let isBarrier = 1, hasSideEffects = 1, isTerminator = 1,
+ Defs = [ R7, LR, SP ] in {
+def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
+ AddrModeNone, SizeSpecial, IndexModeNone,
+ Pseudo, NoItinerary,
+ "ldr\t$scratch, [$src, #8]\n\t"
+ "mov\tsp, $scratch\n\t"
+ "ldr\t$scratch, [$src, #4]\n\t"
+ "ldr\tr7, [$src]\n\t"
+ "bx\t$scratch", "",
+ [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
+ Requires<[IsThumb, IsDarwin]>;
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -1011,7 +1037,7 @@ def : T1Pat<(i32 imm0_255_comp:$src),
// scheduling.
let isReMaterializable = 1 in
def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 742bd40..b91c089 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -185,8 +185,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{15} = 0;
}
// register
- def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
- opc, ".w\t$dst, $src",
+ def r : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr,
+ opc, ".w\t$dst, $src",
[(set GPR:$dst, (opnode GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -198,9 +198,9 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{5-4} = 0b00; // type
}
// shifted register
- def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
- opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
+ def s : T2sI<(outs GPR:$dst), (ins t2_so_reg:$src), IIC_iMOVsi,
+ opc, ".w\t$dst, $src",
+ [(set GPR:$dst, (opnode t2_so_reg:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -210,7 +210,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
}
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
-// binary operation that produces a value. These are predicable and can be
+/// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0, string wide =""> {
@@ -259,23 +259,23 @@ multiclass T2I_bin_w_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_bin_irs counterpart.
multiclass T2I_rbin_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
- opc, ".w\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ opc, ".w\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
+ let Inst{20} = ?; // The S bit.
let Inst{15} = 0;
}
// shifted register
- def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
- opc, "\t$dst, $rhs, $lhs",
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ opc, "\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
- let Inst{20} = 0; // The S bit.
+ let Inst{20} = ?; // The S bit.
}
}
@@ -461,10 +461,9 @@ multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
let Defs = [CPSR] in {
multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
// shifted imm
- def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
- IIC_iALUi,
- !strconcat(opc, "${s}.w\t$dst, $rhs, $lhs"),
- [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
+ def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs), IIC_iALUi,
+ !strconcat(opc, "s"), ".w\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -472,10 +471,9 @@ multiclass T2I_rbin_s_is<bits<4> opcod, string opc, PatFrag opnode> {
let Inst{15} = 0;
}
// shifted register
- def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
- IIC_iALUsi,
- !strconcat(opc, "${s}\t$dst, $rhs, $lhs"),
- [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
+ def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs), IIC_iALUsi,
+ !strconcat(opc, "s"), "\t$dst, $rhs, $lhs",
+ [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -639,7 +637,8 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, ".w\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set GPR:$dst, (opnode GPR:$src))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -650,7 +649,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, ".w\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -665,7 +665,8 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
opc, "\t$dst, $src",
- [(set GPR:$dst, (opnode GPR:$src))]> {
+ [(set GPR:$dst, (opnode GPR:$src))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -676,7 +677,8 @@ multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> {
}
def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
opc, "\t$dst, $src, ror $rot",
- [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> {
+ [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -717,7 +719,8 @@ multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
opc, "\t$dst, $LHS, $RHS",
- [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]> {
+ [(set GPR:$dst, (opnode GPR:$LHS, GPR:$RHS))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -728,7 +731,8 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot",
[(set GPR:$dst, (opnode GPR:$LHS,
- (rotr GPR:$RHS, rot_imm:$rot)))]> {
+ (rotr GPR:$RHS, rot_imm:$rot)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0100;
let Inst{22-20} = opcod;
@@ -771,6 +775,8 @@ multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
// LEApcrel - Load a pc-relative address into a register without offending the
// assembler.
+let neverHasSideEffects = 1 in {
+let isReMaterializable = 1 in
def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi,
"adr$p.w\t$dst, #$label", []> {
let Inst{31-27} = 0b11110;
@@ -792,6 +798,7 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst),
let Inst{19-16} = 0b1111; // Rn
let Inst{15} = 0;
}
+} // neverHasSideEffects
// ADD r, sp, {so_imm|i12}
def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -856,9 +863,11 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
let Inst{15} = 0;
}
-// Signed and unsigned division, for disassembly only
+// Signed and unsigned division on v7-M
def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
- "sdiv", "\t$dst, $a, $b", []> {
+ "sdiv", "\t$dst, $a, $b",
+ [(set GPR:$dst, (sdiv GPR:$a, GPR:$b))]>,
+ Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011100;
let Inst{20} = 0b1;
@@ -867,7 +876,9 @@ def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
}
def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
- "udiv", "\t$dst, $a, $b", []> {
+ "udiv", "\t$dst, $a, $b",
+ [(set GPR:$dst, (udiv GPR:$a, GPR:$b))]>,
+ Requires<[HasDivide]> {
let Inst{31-27} = 0b11111;
let Inst{26-21} = 0b011101;
let Inst{20} = 0b1;
@@ -878,11 +889,11 @@ def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
- NoItinerary, "@ sub.w\t$dst, $sp, $imm", []>;
+ NoItinerary, "${:comment} sub.w\t$dst, $sp, $imm", []>;
def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
- NoItinerary, "@ subw\t$dst, $sp, $imm", []>;
+ NoItinerary, "${:comment} subw\t$dst, $sp, $imm", []>;
def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
- NoItinerary, "@ sub\t$dst, $sp, $rhs", []>;
+ NoItinerary, "${:comment} sub\t$dst, $sp, $rhs", []>;
} // usesCustomInserter
@@ -902,7 +913,7 @@ defm t2LDRB : T2I_ld<0, 0b00, "ldrb", UnOpFrag<(zextloadi8 node:$Src)>>;
defm t2LDRSH : T2I_ld<1, 0b01, "ldrsh", UnOpFrag<(sextloadi16 node:$Src)>>;
defm t2LDRSB : T2I_ld<1, 0b00, "ldrsb", UnOpFrag<(sextloadi8 node:$Src)>>;
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def t2LDRDi8 : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
(ins t2addrmode_imm8s4:$addr),
@@ -912,7 +923,7 @@ def t2LDRDpci : T2Ii8s4<1, 0, 1, (outs GPR:$dst1, GPR:$dst2),
"ldrd", "\t$dst1, $addr", []> {
let Inst{19-16} = 0b1111; // Rn
}
-}
+} // mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1
// zextload i1 -> zextload i8
def : T2Pat<(zextloadi1 t2addrmode_imm12:$addr),
@@ -955,7 +966,7 @@ def : T2Pat<(extloadi16 (ARMWrapper tconstpool:$addr)),
(t2LDRHpci tconstpool:$addr)>;
// Indexed loads
-let mayLoad = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1 in {
def t2LDR_PRE : T2Iidxldst<0, 0b10, 1, 1, (outs GPR:$dst, GPR:$base_wb),
(ins t2addrmode_imm8:$addr),
AddrModeT2_i8, IndexModePre, IIC_iLoadiu,
@@ -1011,7 +1022,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
AddrModeT2_i8, IndexModePost, IIC_iLoadiu,
"ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb",
[]>;
-}
+} // mayLoad = 1, neverHasSideEffects = 1
// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are
// for disassembly only.
@@ -1041,7 +1052,7 @@ defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
-let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in
def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs),
(ins GPR:$src1, GPR:$src2, t2addrmode_imm8s4:$addr),
IIC_iStorer, "strd", "\t$src1, $addr", []>;
@@ -1204,7 +1215,7 @@ defm t2PLI : T2Ipl<1, 0, "pli">;
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops), IIC_iLoadm,
"ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", []> {
@@ -1227,9 +1238,9 @@ def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
let Inst{21} = 1; // The W bit.
let Inst{20} = 1; // Load
}
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p,
reglist:$srcs, variable_ops), IIC_iStorem,
"stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", []> {
@@ -1253,7 +1264,7 @@ def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
let Inst{21} = 1; // The W bit.
let Inst{20} = 0; // Store
}
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
//===----------------------------------------------------------------------===//
// Move Instructions.
@@ -1564,9 +1575,9 @@ def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
}
let Defs = [CPSR] in {
-def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- "lsrs.w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
+def t2MOVsrl_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ "lsrs", ".w\t$dst, $src, #1",
+ [(set GPR:$dst, (ARMsrl_flag GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -1577,9 +1588,9 @@ def t2MOVsrl_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
let Inst{14-12} = 0b000;
let Inst{7-6} = 0b01;
}
-def t2MOVsra_flag : T2XI<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
- "asrs.w\t$dst, $src, #1",
- [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
+def t2MOVsra_flag : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi,
+ "asrs", ".w\t$dst, $src, #1",
+ [(set GPR:$dst, (ARMsra_flag GPR:$src))]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = 0b0010;
@@ -2058,7 +2069,8 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
- 0xFFFF0000)))]> {
+ 0xFFFF0000)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2068,15 +2080,18 @@ def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHBT where identities eliminate some nodes.
def : T2Pat<(or (and GPR:$src1, 0xFFFF), (and GPR:$src2, 0xFFFF0000)),
- (t2PKHBT GPR:$src1, GPR:$src2, 0)>;
+ (t2PKHBT GPR:$src1, GPR:$src2, 0)>,
+ Requires<[HasT2ExtractPack]>;
def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
- (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
+ (t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>,
+ Requires<[HasT2ExtractPack]>;
def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
- 0xFFFF)))]> {
+ 0xFFFF)))]>,
+ Requires<[HasT2ExtractPack]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-20} = 0b01100;
@@ -2087,10 +2102,12 @@ def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
// Alternate cases for PKHTB where identities eliminate some nodes. Note that
// a shift amount of 0 is *not legal* here, it is PKHBT instead.
def : T2Pat<(or (and GPR:$src1, 0xFFFF0000), (srl GPR:$src2, (i32 16))),
- (t2PKHTB GPR:$src1, GPR:$src2, 16)>;
+ (t2PKHTB GPR:$src1, GPR:$src2, 16)>,
+ Requires<[HasT2ExtractPack]>;
def : T2Pat<(or (and GPR:$src1, 0xFFFF0000),
(and (srl GPR:$src2, imm1_15:$shamt), 0xFFFF)),
- (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>;
+ (t2PKHTB GPR:$src1, GPR:$src2, imm1_15:$shamt)>,
+ Requires<[HasT2ExtractPack]>;
//===----------------------------------------------------------------------===//
// Comparison Instructions...
@@ -2127,6 +2144,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
// a two-value operand where a dag node expects two operands. :(
+let neverHasSideEffects = 1 in {
def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -2178,6 +2196,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs GPR:$dst),
(ins GPR:$false, GPR:$true, i32imm:$rhs),
IIC_iCMOVsi, "ror", ".w\t$dst, $true, $rhs", []>,
RegConstraint<"$false = $dst">;
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
@@ -2378,13 +2397,13 @@ let Defs =
D31 ] in {
def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, HasVFP2]>;
@@ -2394,13 +2413,13 @@ let Defs =
[ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in {
def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val),
AddrModeNone, SizeSpecial, NoItinerary,
- "str\t$val, [$src, #8]\t@ begin eh.setjmp\n"
+ "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n"
"\tmov\t$val, pc\n"
- "\tadds\t$val, #9\n"
+ "\tadds\t$val, #7\n"
"\tstr\t$val, [$src, #4]\n"
"\tmovs\tr0, #0\n"
"\tb\t1f\n"
- "\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n"
"1:", "",
[(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>,
Requires<[IsThumb2, NoVFP]>;
@@ -2672,7 +2691,7 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// scheduling.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
- NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 36fcaa1..54474cf 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -76,7 +76,7 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
// Load / store multiple Instructions.
//
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
+let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in {
def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts,
variable_ops), IndexModeNone, IIC_fpLoadm,
"vldm${addr:submode}${p}\t${addr:base}, $dsts", "", []> {
@@ -104,9 +104,9 @@ def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
"$addr.base = $wb", []> {
let Inst{20} = 1;
}
-} // mayLoad, hasExtraDefRegAllocReq
+} // mayLoad, neverHasSideEffects, hasExtraDefRegAllocReq
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in {
def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs,
variable_ops), IndexModeNone, IIC_fpStorem,
"vstm${addr:submode}${p}\t${addr:base}, $srcs", "", []> {
@@ -134,7 +134,7 @@ def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p,
"$addr.base = $wb", []> {
let Inst{20} = 0;
}
-} // mayStore, hasExtraSrcRegAllocReq
+} // mayStore, neverHasSideEffects, hasExtraSrcRegAllocReq
// FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
@@ -313,6 +313,7 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
IIC_fpMOVIS, "vmov", "\t$dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>;
+let neverHasSideEffects = 1 in {
def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src",
@@ -326,6 +327,7 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010,
[/* For disassembly only; pattern left blank */]> {
let Inst{7-6} = 0b00;
}
+} // neverHasSideEffects
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
@@ -337,6 +339,7 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
let Inst{7-6} = 0b00;
}
+let neverHasSideEffects = 1 in
def VMOVSRR : AVConv5I<0b11000100, 0b1010,
(outs SPR:$dst1, SPR:$dst2), (ins GPR:$src1, GPR:$src2),
IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2",
@@ -606,6 +609,7 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0,
// FP Conditional moves.
//
+let neverHasSideEffects = 1 in {
def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
@@ -629,7 +633,7 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0,
IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-
+} // neverHasSideEffects
//===----------------------------------------------------------------------===//
// Misc.
@@ -651,6 +655,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
// FPSCR <-> GPR (for disassembly only)
+let neverHasSideEffects = 1 in {
let Uses = [FPSCR] in {
def VMRS : VFPAI<(outs GPR:$dst), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
"\t$dst, fpscr",
@@ -674,6 +679,7 @@ def VMSR : VFPAI<(outs), (ins GPR:$src), VFPMiscFrm, IIC_fpSTAT, "vmsr",
let Inst{4} = 1;
}
}
+} // neverHasSideEffects
// Materialize FP immediates. VFP3 only.
let isReMaterializable = 1 in {
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index b31a4fa..5f6d7ee 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -318,6 +318,18 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
*((intptr_t*)RelocPos) |= ResultPtr;
break;
}
+ case ARM::reloc_arm_movw: {
+ ResultPtr = ResultPtr & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
+ case ARM::reloc_arm_movt: {
+ ResultPtr = (ResultPtr >> 16) & 0xFFFF;
+ *((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
+ *((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16;
+ break;
+ }
}
}
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index 041afd0..8edfb9a 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -23,16 +23,6 @@ namespace llvm {
class ARMBaseInstrInfo;
class Type;
-namespace ARM {
- /// SubregIndex - The index of various subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// ARMRegisterInfo.td file.
- enum SubregIndex {
- SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
- DSUBREG_0 = 5, DSUBREG_1 = 6
- };
-}
-
struct ARMRegisterInfo : public ARMBaseRegisterInfo {
public:
ARMRegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI);
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 0d4200c..6beca8b 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -23,6 +23,44 @@ class ARMFReg<bits<6> num, string n> : Register<n> {
let Namespace = "ARM";
}
+// Subregister indices.
+let Namespace = "ARM" in {
+// Note: Code depends on these having consecutive numbers.
+def ssub_0 : SubRegIndex;
+def ssub_1 : SubRegIndex;
+def ssub_2 : SubRegIndex; // In a Q reg.
+def ssub_3 : SubRegIndex;
+def ssub_4 : SubRegIndex; // In a QQ reg.
+def ssub_5 : SubRegIndex;
+def ssub_6 : SubRegIndex;
+def ssub_7 : SubRegIndex;
+def ssub_8 : SubRegIndex; // In a QQQQ reg.
+def ssub_9 : SubRegIndex;
+def ssub_10 : SubRegIndex;
+def ssub_11 : SubRegIndex;
+def ssub_12 : SubRegIndex;
+def ssub_13 : SubRegIndex;
+def ssub_14 : SubRegIndex;
+def ssub_15 : SubRegIndex;
+
+def dsub_0 : SubRegIndex;
+def dsub_1 : SubRegIndex;
+def dsub_2 : SubRegIndex;
+def dsub_3 : SubRegIndex;
+def dsub_4 : SubRegIndex;
+def dsub_5 : SubRegIndex;
+def dsub_6 : SubRegIndex;
+def dsub_7 : SubRegIndex;
+
+def qsub_0 : SubRegIndex;
+def qsub_1 : SubRegIndex;
+def qsub_2 : SubRegIndex;
+def qsub_3 : SubRegIndex;
+
+def qqsub_0 : SubRegIndex;
+def qqsub_1 : SubRegIndex;
+}
+
// Integer registers
def R0 : ARMReg< 0, "r0">, DwarfRegNum<[0]>;
def R1 : ARMReg< 1, "r1">, DwarfRegNum<[1]>;
@@ -58,9 +96,9 @@ def S24 : ARMFReg<24, "s24">; def S25 : ARMFReg<25, "s25">;
def S26 : ARMFReg<26, "s26">; def S27 : ARMFReg<27, "s27">;
def S28 : ARMFReg<28, "s28">; def S29 : ARMFReg<29, "s29">;
def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">;
-def SDummy : ARMFReg<63, "sINVALID">;
// Aliases of the F* registers used to hold 64-bit fp values (doubles)
+let SubRegIndices = [ssub_0, ssub_1] in {
def D0 : ARMReg< 0, "d0", [S0, S1]>;
def D1 : ARMReg< 1, "d1", [S2, S3]>;
def D2 : ARMReg< 2, "d2", [S4, S5]>;
@@ -77,6 +115,7 @@ def D12 : ARMReg<12, "d12", [S24, S25]>;
def D13 : ARMReg<13, "d13", [S26, S27]>;
def D14 : ARMReg<14, "d14", [S28, S29]>;
def D15 : ARMReg<15, "d15", [S30, S31]>;
+}
// VFP3 defines 16 additional double registers
def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">;
@@ -89,6 +128,9 @@ def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">;
def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">;
// Advanced SIMD (NEON) defines 16 quad-word aliases
+let SubRegIndices = [dsub_0, dsub_1],
+ CompositeIndices = [(ssub_2 dsub_1, ssub_0),
+ (ssub_3 dsub_1, ssub_1)] in {
def Q0 : ARMReg< 0, "q0", [D0, D1]>;
def Q1 : ARMReg< 1, "q1", [D2, D3]>;
def Q2 : ARMReg< 2, "q2", [D4, D5]>;
@@ -97,6 +139,8 @@ def Q4 : ARMReg< 4, "q4", [D8, D9]>;
def Q5 : ARMReg< 5, "q5", [D10, D11]>;
def Q6 : ARMReg< 6, "q6", [D12, D13]>;
def Q7 : ARMReg< 7, "q7", [D14, D15]>;
+}
+let SubRegIndices = [dsub_0, dsub_1] in {
def Q8 : ARMReg< 8, "q8", [D16, D17]>;
def Q9 : ARMReg< 9, "q9", [D18, D19]>;
def Q10 : ARMReg<10, "q10", [D20, D21]>;
@@ -105,6 +149,51 @@ def Q12 : ARMReg<12, "q12", [D24, D25]>;
def Q13 : ARMReg<13, "q13", [D26, D27]>;
def Q14 : ARMReg<14, "q14", [D28, D29]>;
def Q15 : ARMReg<15, "q15", [D30, D31]>;
+}
+
+// Pseudo 256-bit registers to represent pairs of Q registers. These should
+// never be present in the emitted code.
+// These are used for NEON load / store instructions, e.g. vld4, vst3.
+// NOTE: It's possible to define more QQ registers since technical the
+// starting D register number doesn't have to be multiple of 4. e.g.
+// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register
+// stuffs very messy.
+let SubRegIndices = [qsub_0, qsub_1] in {
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1),
+ (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1),
+ (ssub_6 qsub_1, ssub_2), (ssub_7 qsub_1, ssub_3)] in {
+def QQ0 : ARMReg<0, "qq0", [Q0, Q1]>;
+def QQ1 : ARMReg<1, "qq1", [Q2, Q3]>;
+def QQ2 : ARMReg<2, "qq2", [Q4, Q5]>;
+def QQ3 : ARMReg<3, "qq3", [Q6, Q7]>;
+}
+let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1)] in {
+def QQ4 : ARMReg<4, "qq4", [Q8, Q9]>;
+def QQ5 : ARMReg<5, "qq5", [Q10, Q11]>;
+def QQ6 : ARMReg<6, "qq6", [Q12, Q13]>;
+def QQ7 : ARMReg<7, "qq7", [Q14, Q15]>;
+}
+}
+
+// Pseudo 512-bit registers to represent four consecutive Q registers.
+let SubRegIndices = [qqsub_0, qqsub_1] in {
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3),
+ (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1),
+ (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3),
+ (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5),
+ (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in {
+def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>;
+def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>;
+}
+let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1),
+ (dsub_4 qqsub_1, dsub_0), (dsub_5 qqsub_1, dsub_1),
+ (dsub_6 qqsub_1, dsub_2), (dsub_7 qqsub_1, dsub_3)] in {
+def QQQQ2 : ARMReg<2, "qqqq2", [QQ4, QQ5]>;
+def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>;
+}
+}
// Current Program Status Register.
def CPSR : ARMReg<0, "cpsr">;
@@ -270,11 +359,6 @@ def SPR_8 : RegisterClass<"ARM", [f32], 32,
[S0, S1, S2, S3, S4, S5, S6, S7,
S8, S9, S10, S11, S12, S13, S14, S15]>;
-// Dummy f32 regclass to represent impossible subreg indices.
-def SPR_INVALID : RegisterClass<"ARM", [f32], 32, [SDummy]> {
- let CopyCost = -1;
-}
-
// Scalar double precision floating point / generic 64-bit vector register
// class.
// ARM requires only word alignment for double. It's more performant if it
@@ -284,7 +368,6 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
D8, D9, D10, D11, D12, D13, D14, D15,
D16, D17, D18, D19, D20, D21, D22, D23,
D24, D25, D26, D27, D28, D29, D30, D31]> {
- let SubRegClassList = [SPR_INVALID, SPR_INVALID];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -332,79 +415,68 @@ def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
[D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15]> {
- let SubRegClassList = [SPR, SPR];
+ let SubRegClasses = [(SPR ssub_0, ssub_1)];
}
// Subset of DPR which can be used as a source of NEON scalars for 16-bit
// operations
def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
[D0, D1, D2, D3, D4, D5, D6, D7]> {
- let SubRegClassList = [SPR_8, SPR_8];
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
}
// Generic 128-bit vector register class.
def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15]> {
- let SubRegClassList = [SPR_INVALID, SPR_INVALID, SPR_INVALID, SPR_INVALID,
- DPR, DPR];
+ let SubRegClasses = [(DPR dsub_0, dsub_1)];
}
// Subset of QPR that have 32-bit SPR subregs.
def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128,
[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]> {
- let SubRegClassList = [SPR, SPR, SPR, SPR, DPR_VFP2, DPR_VFP2];
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1)];
}
// Subset of QPR that have DPR_8 and SPR_8 subregs.
def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
128,
[Q0, Q1, Q2, Q3]> {
- let SubRegClassList = [SPR_8, SPR_8, SPR_8, SPR_8, DPR_8, DPR_8];
+ let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_8 dsub_0, dsub_1)];
+}
+
+// Pseudo 256-bit vector register class to model pairs of Q registers
+// (4 consecutive D registers).
+def QQPR : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR qsub_0, qsub_1)];
+}
+
+// Subset of QQPR that have 32-bit SPR subregs.
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
+ 256,
+ [QQ0, QQ1, QQ2, QQ3]> {
+ let SubRegClasses = [(SPR ssub_0, ssub_1, ssub_2, ssub_3),
+ (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
+ (QPR_VFP2 qsub_0, qsub_1)];
+
+}
+
+// Pseudo 512-bit vector register class to model 4 consecutive Q registers
+// (8 consecutive D registers).
+def QQQQPR : RegisterClass<"ARM", [v8i64],
+ 256,
+ [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+ let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
+ dsub_4, dsub_5, dsub_6, dsub_7),
+ (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
}
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
-//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def arm_ssubreg_0 : PatLeaf<(i32 1)>;
-def arm_ssubreg_1 : PatLeaf<(i32 2)>;
-def arm_ssubreg_2 : PatLeaf<(i32 3)>;
-def arm_ssubreg_3 : PatLeaf<(i32 4)>;
-def arm_dsubreg_0 : PatLeaf<(i32 5)>;
-def arm_dsubreg_1 : PatLeaf<(i32 6)>;
-
-// S sub-registers of D registers.
-def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [S0, S2, S4, S6, S8, S10, S12, S14,
- S16, S18, S20, S22, S24, S26, S28, S30]>;
-def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [S1, S3, S5, S7, S9, S11, S13, S15,
- S17, S19, S21, S23, S25, S27, S29, S31]>;
-
-// S sub-registers of Q registers.
-def : SubRegSet<1, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S0, S4, S8, S12, S16, S20, S24, S28]>;
-def : SubRegSet<2, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S1, S5, S9, S13, S17, S21, S25, S29]>;
-def : SubRegSet<3, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S2, S6, S10, S14, S18, S22, S26, S30]>;
-def : SubRegSet<4, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7],
- [S3, S7, S11, S15, S19, S23, S27, S31]>;
-
-// D sub-registers of Q registers.
-def : SubRegSet<5, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
- Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- [D0, D2, D4, D6, D8, D10, D12, D14,
- D16, D18, D20, D22, D24, D26, D28, D30]>;
-def : SubRegSet<6, [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
- Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15],
- [D1, D3, D5, D7, D9, D11, D13, D15,
- D17, D19, D21, D23, D25, D27, D29, D31]>;
diff --git a/lib/Target/ARM/ARMRelocations.h b/lib/Target/ARM/ARMRelocations.h
index 2cc2950..86e7206 100644
--- a/lib/Target/ARM/ARMRelocations.h
+++ b/lib/Target/ARM/ARMRelocations.h
@@ -47,7 +47,13 @@ namespace llvm {
reloc_arm_pic_jt,
// reloc_arm_branch - Branch address relocation.
- reloc_arm_branch
+ reloc_arm_branch,
+
+ // reloc_arm_movt - MOVT immediate relocation.
+ reloc_arm_movt,
+
+ // reloc_arm_movw - MOVW immediate relocation.
+ reloc_arm_movw
};
}
}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
index c04ee38..a289407 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp
@@ -12,11 +12,123 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "arm-selectiondag-info"
-#include "ARMSelectionDAGInfo.h"
+#include "ARMTargetMachine.h"
using namespace llvm;
-ARMSelectionDAGInfo::ARMSelectionDAGInfo() {
+ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
}
ARMSelectionDAGInfo::~ARMSelectionDAGInfo() {
}
+
+SDValue
+ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const {
+ // Do repeated 4-byte loads and stores. To be improved.
+ // This requires 4-byte alignment.
+ if ((Align & 3) != 0)
+ return SDValue();
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ unsigned BytesLeft = SizeVal & 3;
+ unsigned NumMemOps = SizeVal >> 2;
+ unsigned EmittedNumMemOps = 0;
+ EVT VT = MVT::i32;
+ unsigned VTSize = 4;
+ unsigned i = 0;
+ const unsigned MAX_LOADS_IN_LDM = 6;
+ SDValue TFOps[MAX_LOADS_IN_LDM];
+ SDValue Loads[MAX_LOADS_IN_LDM];
+ uint64_t SrcOff = 0, DstOff = 0;
+
+ // Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
+ // same number of stores. The loads and stores will get combined into
+ // ldm/stm later on.
+ while (EmittedNumMemOps < NumMemOps) {
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff, isVolatile, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ SrcOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ for (i = 0;
+ i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, isVolatile, false, 0);
+ DstOff += VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ EmittedNumMemOps += i;
+ }
+
+ if (BytesLeft == 0)
+ return Chain;
+
+ // Issue loads / stores for the trailing (1 - 3) bytes.
+ unsigned BytesLeftSave = BytesLeft;
+ i = 0;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ Loads[i] = DAG.getLoad(VT, dl, Chain,
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Src,
+ DAG.getConstant(SrcOff, MVT::i32)),
+ SrcSV, SrcSVOff + SrcOff, false, false, 0);
+ TFOps[i] = Loads[i].getValue(1);
+ ++i;
+ SrcOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+
+ i = 0;
+ BytesLeft = BytesLeftSave;
+ while (BytesLeft) {
+ if (BytesLeft >= 2) {
+ VT = MVT::i16;
+ VTSize = 2;
+ } else {
+ VT = MVT::i8;
+ VTSize = 1;
+ }
+
+ TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
+ DAG.getNode(ISD::ADD, dl, MVT::i32, Dst,
+ DAG.getConstant(DstOff, MVT::i32)),
+ DstSV, DstSVOff + DstOff, false, false, 0);
+ ++i;
+ DstOff += VTSize;
+ BytesLeft -= VTSize;
+ }
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
+}
diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h
index afe9a47..d7d00c2 100644
--- a/lib/Target/ARM/ARMSelectionDAGInfo.h
+++ b/lib/Target/ARM/ARMSelectionDAGInfo.h
@@ -19,9 +19,24 @@
namespace llvm {
class ARMSelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const ARMSubtarget *Subtarget;
+
public:
- ARMSelectionDAGInfo();
+ explicit ARMSelectionDAGInfo(const TargetMachine &TM);
~ARMSelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const;
};
}
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index b11580a..10fd257 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -39,6 +39,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
, IsR9Reserved(ReserveR9)
, UseMovt(UseMOVT)
, HasFP16(false)
+ , HasHardwareDivide(false)
+ , HasT2ExtractPack(false)
, stackAlignment(4)
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
@@ -73,6 +75,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
unsigned SubVer = TT[Idx];
if (SubVer >= '7' && SubVer <= '9') {
ARMArchVersion = V7A;
+ if (Len >= Idx+2 && TT[Idx+1] == 'm')
+ ARMArchVersion = V7M;
} else if (SubVer == '6') {
ARMArchVersion = V6;
if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 288a19a..8332bba 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -26,7 +26,7 @@ class GlobalValue;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4, V4T, V5T, V5TE, V6, V6T2, V7A
+ V4, V4T, V5T, V5TE, V6, V6T2, V7A, V7M
};
enum ARMFPEnum {
@@ -39,7 +39,7 @@ protected:
};
/// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
- /// V6, V6T2, V7A.
+ /// V6, V6T2, V7A, V7M.
ARMArchEnum ARMArchVersion;
/// ARMFPUType - Floating Point Unit type.
@@ -74,6 +74,13 @@ protected:
/// only so far)
bool HasFP16;
+ /// HasHardwareDivide - True if subtarget supports [su]div
+ bool HasHardwareDivide;
+
+ /// HasT2ExtractPack - True if subtarget supports thumb2 extract/pack
+ /// instructions.
+ bool HasT2ExtractPack;
+
/// stackAlignment - The minimum alignment known to hold of the stack frame on
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
@@ -123,6 +130,8 @@ protected:
bool hasNEON() const { return ARMFPUType >= NEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool hasDivide() const { return HasHardwareDivide; }
+ bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 662e61e..b4a9252 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -62,7 +62,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
@@ -76,7 +77,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
"i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
"i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 4e205df..a222e57 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -21,6 +21,7 @@
#include "ARMJITInfo.h"
#include "ARMSubtarget.h"
#include "ARMISelLowering.h"
+#include "ARMSelectionDAGInfo.h"
#include "Thumb1InstrInfo.h"
#include "Thumb2InstrInfo.h"
#include "llvm/ADT/OwningPtr.h"
@@ -63,6 +64,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
ARMInstrInfo InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
public:
ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -75,6 +77,10 @@ public:
return &TLInfo;
}
+ virtual const ARMSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const ARMInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetData *getTargetData() const { return &DataLayout; }
};
@@ -88,6 +94,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
OwningPtr<ARMBaseInstrInfo> InstrInfo;
const TargetData DataLayout; // Calculates type size & alignment
ARMTargetLowering TLInfo;
+ ARMSelectionDAGInfo TSInfo;
public:
ThumbTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -101,6 +108,10 @@ public:
return &TLInfo;
}
+ virtual const ARMSelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
/// returns either Thumb1InstrInfo or Thumb2InstrInfo
virtual const ARMBaseInstrInfo *getInstrInfo() const {
return InstrInfo.get();
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 80a9d2d..d95efdb 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -319,16 +319,16 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
assert(TargetRegisterInfo::isPhysicalRegister(Reg));
if (Modifier && strcmp(Modifier, "dregpair") == 0) {
- unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, 5);// arm_dsubreg_0
- unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, 6);// arm_dsubreg_1
+ unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0);
+ unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1);
O << '{'
<< getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
<< '}';
} else if (Modifier && strcmp(Modifier, "lane") == 0) {
unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg);
unsigned DReg =
- TM.getRegisterInfo()->getMatchingSuperReg(Reg, RegNum & 1 ? 2 : 1,
- &ARM::DPR_VFP2RegClass);
+ TM.getRegisterInfo()->getMatchingSuperReg(Reg,
+ RegNum & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass);
O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
} else {
assert(!MO.getSubReg() && "Subregs should be eliminated!");
@@ -1375,13 +1375,32 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
case ARM::MOVi32imm: { // FIXME: Remove asmstring from td file.
// This is a hack that lowers as a two instruction sequence.
unsigned DstReg = MI->getOperand(0).getReg();
- unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
-
+ const MachineOperand &MO = MI->getOperand(1);
+ MCOperand V1, V2;
+ if (MO.isImm()) {
+ unsigned ImmVal = (unsigned)MI->getOperand(1).getImm();
+ V1 = MCOperand::CreateImm(ImmVal & 65535);
+ V2 = MCOperand::CreateImm(ImmVal >> 16);
+ } else if (MO.isGlobal()) {
+ MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO);
+ const MCSymbolRefExpr *SymRef1 =
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_LO16, OutContext);
+ const MCSymbolRefExpr *SymRef2 =
+ MCSymbolRefExpr::Create(Symbol,
+ MCSymbolRefExpr::VK_ARM_HI16, OutContext);
+ V1 = MCOperand::CreateExpr(SymRef1);
+ V2 = MCOperand::CreateExpr(SymRef2);
+ } else {
+ MI->dump();
+ llvm_unreachable("cannot handle this operand");
+ }
+
{
MCInst TmpInst;
TmpInst.setOpcode(ARM::MOVi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
- TmpInst.addOperand(MCOperand::CreateImm(ImmVal & 65535)); // lower16(imm)
+ TmpInst.addOperand(V1); // lower16(imm)
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
@@ -1395,7 +1414,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
TmpInst.setOpcode(ARM::MOVTi16);
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // dstreg
TmpInst.addOperand(MCOperand::CreateReg(DstReg)); // srcreg
- TmpInst.addOperand(MCOperand::CreateImm(ImmVal >> 16)); // upper16(imm)
+ TmpInst.addOperand(V2); // upper16(imm)
// Predicate.
TmpInst.addOperand(MCOperand::CreateImm(MI->getOperand(2).getImm()));
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index ac6331f..2b94b76 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -195,8 +195,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
// FIXME: Breaks e.g. ARM/vmul.ll.
assert(0);
/*
- unsigned DRegLo = TRI->getSubReg(Reg, 5); // arm_dsubreg_0
- unsigned DRegHi = TRI->getSubReg(Reg, 6); // arm_dsubreg_1
+ unsigned DRegLo = TRI->getSubReg(Reg, ARM::dsub_0);
+ unsigned DRegHi = TRI->getSubReg(Reg, ARM::dsub_1);
O << '{'
<< getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi)
<< '}';*/
@@ -217,7 +217,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"));
O << '#' << Op.getImm();
} else {
- assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported");
+ if (Modifier && Modifier[0] != 0 && strcmp(Modifier, "call") != 0)
+ llvm_unreachable("Unsupported modifier");
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << *Op.getExpr();
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
index 383d30d..b81a306 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
//class ARMSubtarget;
/// ARMMCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN ARMMCInstLower {
+class LLVM_LIBRARY_VISIBILITY ARMMCInstLower {
MCContext &Ctx;
Mangler &Mang;
AsmPrinter &Printer;
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 3c0414d..0a4400c 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -118,7 +118,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) {
ARMFunctionInfo *AFI = Fn.getInfo<ARMFunctionInfo>();
const TargetMachine &TM = Fn.getTarget();
- if (AFI->isThumbFunction())
+ if (AFI->isThumb1OnlyFunction())
return false;
TRI = TM.getRegisterInfo();
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index ef6bf3a..a725898 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -33,7 +33,8 @@ namespace {
private:
bool FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs);
+ unsigned FirstOpnd, unsigned NumRegs,
+ unsigned Offset, unsigned Stride) const;
bool PreAllocNEONRegisters(MachineBasicBlock &MBB);
};
@@ -338,24 +339,122 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
return false;
}
-bool NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
- unsigned FirstOpnd, unsigned NumRegs) {
- MachineInstr *RegSeq = 0;
+bool
+NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
+ unsigned FirstOpnd, unsigned NumRegs,
+ unsigned Offset, unsigned Stride) const {
+ MachineOperand &FMO = MI->getOperand(FirstOpnd);
+ assert(FMO.isReg() && FMO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = FMO.getReg();
+ (void)VirtReg;
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+
+ unsigned LastSubIdx = 0;
+ if (FMO.isDef()) {
+ MachineInstr *RegSeq = 0;
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "expected a virtual register");
+ // Feeding into a REG_SEQUENCE.
+ if (!MRI->hasOneNonDBGUse(VirtReg))
+ return false;
+ MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
+ if (!UseMI->isRegSequence())
+ return false;
+ if (RegSeq && RegSeq != UseMI)
+ return false;
+ unsigned OpIdx = 1 + (Offset + R * Stride) * 2;
+ if (UseMI->getOperand(OpIdx).getReg() != VirtReg)
+ llvm_unreachable("Malformed REG_SEQUENCE instruction!");
+ unsigned SubIdx = UseMI->getOperand(OpIdx + 1).getImm();
+ if (LastSubIdx) {
+ if (LastSubIdx != SubIdx-Stride)
+ return false;
+ } else {
+ // Must start from dsub_0 or qsub_0.
+ if (SubIdx != (ARM::dsub_0+Offset) &&
+ SubIdx != (ARM::qsub_0+Offset))
+ return false;
+ }
+ RegSeq = UseMI;
+ LastSubIdx = SubIdx;
+ }
+
+ // In the case of vld3, etc., make sure the trailing operand of
+ // REG_SEQUENCE is an undef.
+ if (NumRegs == 3) {
+ unsigned OpIdx = 1 + (Offset + 3 * Stride) * 2;
+ const MachineOperand &MO = RegSeq->getOperand(OpIdx);
+ unsigned VirtReg = MO.getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI || !DefMI->isImplicitDef())
+ return false;
+ }
+ return true;
+ }
+
+ unsigned LastSrcReg = 0;
+ SmallVector<unsigned, 4> SubIds;
for (unsigned R = 0; R < NumRegs; ++R) {
- MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ const MachineOperand &MO = MI->getOperand(FirstOpnd + R);
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
unsigned VirtReg = MO.getReg();
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
"expected a virtual register");
- if (!MRI->hasOneNonDBGUse(VirtReg))
+ // Extracting from a Q or QQ register.
+ MachineInstr *DefMI = MRI->getVRegDef(VirtReg);
+ if (!DefMI || !DefMI->isExtractSubreg())
return false;
- MachineInstr *UseMI = &*MRI->use_nodbg_begin(VirtReg);
- if (UseMI->getOpcode() != TargetOpcode::REG_SEQUENCE)
+ VirtReg = DefMI->getOperand(1).getReg();
+ if (LastSrcReg && LastSrcReg != VirtReg)
return false;
- if (RegSeq && RegSeq != UseMI)
+ LastSrcReg = VirtReg;
+ const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
+ if (RC != ARM::QPRRegisterClass &&
+ RC != ARM::QQPRRegisterClass &&
+ RC != ARM::QQQQPRRegisterClass)
return false;
- RegSeq = UseMI;
+ unsigned SubIdx = DefMI->getOperand(2).getImm();
+ if (LastSubIdx) {
+ if (LastSubIdx != SubIdx-Stride)
+ return false;
+ } else {
+ // Must start from dsub_0 or qsub_0.
+ if (SubIdx != (ARM::dsub_0+Offset) &&
+ SubIdx != (ARM::qsub_0+Offset))
+ return false;
+ }
+ SubIds.push_back(SubIdx);
+ LastSubIdx = SubIdx;
}
+
+ // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
+ // currently required for correctness. e.g.
+ // %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
+ // %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
+ // %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
+ // VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,
+ // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5
+ // respectively.
+ // We need to change how we model uses of REG_SEQUENCE.
+ for (unsigned R = 0; R < NumRegs; ++R) {
+ MachineOperand &MO = MI->getOperand(FirstOpnd + R);
+ unsigned OldReg = MO.getReg();
+ MachineInstr *DefMI = MRI->getVRegDef(OldReg);
+ assert(DefMI->isExtractSubreg());
+ MO.setReg(LastSrcReg);
+ MO.setSubReg(SubIds[R]);
+ if (R != 0)
+ MO.setIsKill(false);
+ // Delete the EXTRACT_SUBREG if its result is now dead.
+ if (MRI->use_empty(OldReg))
+ DefMI->eraseFromParent();
+ }
+
return true;
}
@@ -368,7 +467,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
unsigned FirstOpnd, NumRegs, Offset, Stride;
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- if (FormsRegSequence(MI, FirstOpnd, NumRegs))
+ if (llvm::ModelWithRegSequence() &&
+ FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride))
continue;
MachineBasicBlock::iterator NextI = llvm::next(MBBI);
@@ -390,7 +490,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
if (MO.isUse()) {
// Insert a copy from VirtReg.
TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg,
- ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+ ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+ DebugLoc());
if (MO.isKill()) {
MachineInstr *CopyMI = prior(MBBI);
CopyMI->findRegisterUseOperand(VirtReg)->setIsKill();
@@ -399,7 +500,8 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
} else if (MO.isDef() && !MO.isDead()) {
// Add a copy to VirtReg.
TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(),
- ARM::DPRRegisterClass, ARM::DPRRegisterClass);
+ ARM::DPRRegisterClass, ARM::DPRRegisterClass,
+ DebugLoc());
}
}
}
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b10c3f7..fae84d4 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -17,6 +17,7 @@
#include "ARMMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/ADT/SmallVector.h"
@@ -36,10 +37,8 @@ bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == ARM::GPRRegisterClass) {
if (SrcRC == ARM::GPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -97,10 +96,8 @@ canFoldMemoryOperand(const MachineInstr *MI,
void Thumb1InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
assert((RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) && "Unknown regclass!");
@@ -108,6 +105,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
isARMLowRegister(SrcReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -124,10 +124,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void Thumb1InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
assert((RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) && "Unknown regclass!");
@@ -135,6 +133,9 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (RC == ARM::tGPRRegisterClass ||
(TargetRegisterInfo::isPhysicalRegister(DestReg) &&
isARMLowRegister(DestReg))) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -150,7 +151,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
bool Thumb1InstrInfo::
spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -161,9 +163,22 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
AddDefaultPred(MIB);
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
- // Add the callee-saved register as live-in. It's killed at the spill.
- MBB.addLiveIn(Reg);
- MIB.addReg(Reg, RegState::Kill);
+ bool isKill = true;
+
+ // Add the callee-saved register as live-in unless it's LR and
+ // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
+ // then it's already added to the function and entry block live-in sets.
+ if (Reg == ARM::LR) {
+ MachineFunction &MF = *MBB.getParent();
+ if (MF.getFrameInfo()->isReturnAddressTaken() &&
+ MF.getRegInfo().isLiveIn(Reg))
+ isKill = false;
+ }
+
+ if (isKill) {
+ MBB.addLiveIn(Reg);
+ MIB.addReg(Reg, RegState::Kill);
+ }
}
return true;
}
@@ -171,7 +186,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool Thumb1InstrInfo::
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (CSI.empty())
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 516ddf1..c937296 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -39,25 +39,30 @@ public:
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index b143bd9..531d5e9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -40,10 +40,8 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == ARM::GPRRegisterClass) {
if (SrcRC == ARM::GPRRegisterClass) {
BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg);
@@ -63,17 +61,18 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
}
// Handle SPR, DPR, and QPR copies.
- return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC);
+ return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL);
}
void Thumb2InstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -87,17 +86,18 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
return;
}
- ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC);
+ ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
}
void Thumb2InstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
+ DebugLoc DL;
+ if (I != MBB.end()) DL = I->getDebugLoc();
+
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -110,7 +110,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
return;
}
- ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
+ ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
}
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index a0f89a6..2948770 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -35,17 +35,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index ba403e2..3aba363 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -146,16 +146,14 @@ bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
//cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n";
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == Alpha::GPRCRegisterClass) {
BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg)
.addReg(SrcReg)
@@ -180,7 +178,8 @@ void
AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
//cerr << "Trying to store " << getPrettyName(SrcReg) << " to "
// << FrameIdx << "\n";
//BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg);
@@ -208,7 +207,8 @@ void
AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
//cerr << "Trying to load " << getPrettyName(DestReg) << " to "
// << FrameIdx << "\n";
DebugLoc DL;
@@ -399,7 +399,6 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31)
.addReg(Alpha::R31)
.addReg(Alpha::R31);
@@ -430,7 +429,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Alpha::R29);
@@ -457,7 +457,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const {
GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26,
- &Alpha::GPRCRegClass, &Alpha::GPRCRegClass);
+ &Alpha::GPRCRegClass, &Alpha::GPRCRegClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global return address register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Alpha::R26);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index c3b6044..7d7365b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -48,16 +48,19 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index d5d5e02..a47a29b 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -836,7 +836,7 @@ class br_fcc<bits<6> opc, string asmstr>
!strconcat(asmstr, " $R,$dst"), s_fbr>;
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
-let Ra = 31 in
+let Ra = 31, isBarrier = 1 in
def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>;
def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst),
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
index 0eb7b8f..f1958fe 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "alpha-selectiondag-info"
-#include "AlphaSelectionDAGInfo.h"
+#include "AlphaTargetMachine.h"
using namespace llvm;
-AlphaSelectionDAGInfo::AlphaSelectionDAGInfo() {
+AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() {
diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
index 70889ae..3405cc0 100644
--- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h
+++ b/lib/Target/Alpha/AlphaSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class AlphaTargetMachine;
+
class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- AlphaSelectionDAGInfo();
+ explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM);
~AlphaSelectionDAGInfo();
};
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 5169a01..fc9be03 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -32,7 +32,8 @@ AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
JITInfo(*this),
Subtarget(TT, FS),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
setRelocationModel(Reloc::PIC_);
}
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 0990f6d..153944e 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -20,6 +20,7 @@
#include "AlphaInstrInfo.h"
#include "AlphaJITInfo.h"
#include "AlphaISelLowering.h"
+#include "AlphaSelectionDAGInfo.h"
#include "AlphaSubtarget.h"
namespace llvm {
@@ -33,6 +34,7 @@ class AlphaTargetMachine : public LLVMTargetMachine {
AlphaJITInfo JITInfo;
AlphaSubtarget Subtarget;
AlphaTargetLowering TLInfo;
+ AlphaSelectionDAGInfo TSInfo;
public:
AlphaTargetMachine(const Target &T, const std::string &TT,
@@ -47,6 +49,9 @@ public:
virtual const AlphaTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual AlphaJITInfo* getJITInfo() {
return &JITInfo;
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index cf1901b..73924b7 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -138,9 +138,8 @@ bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
unsigned DestReg,
unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (inClass(BF::ALLRegClass, DestReg, DestRC) &&
inClass(BF::ALLRegClass, SrcReg, SrcRC)) {
BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg);
@@ -196,7 +195,8 @@ BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
unsigned SrcReg,
bool isKill,
int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (inClass(BF::DPRegClass, SrcReg, RC)) {
@@ -242,7 +242,8 @@ BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
if (inClass(BF::DPRegClass, DestReg, RC)) {
BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index ea3429c..c1dcd58 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -50,13 +50,15 @@ namespace llvm {
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void storeRegToAddr(MachineFunction &MF,
unsigned SrcReg, bool isKill,
@@ -67,7 +69,8 @@ namespace llvm {
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index 2471688..5cf350a 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -301,9 +301,9 @@ def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr),
def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>;
def : Pat<(i16 (extloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
def : Pat<(i16 (zextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>;
def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
"$dst = b[$ptr + $off] (z);",
@@ -313,17 +313,17 @@ def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))),
(LOAD32p_imm16_8z P:$ptr, imm:$off)>;
def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr),
"$dst = b[$ptr] (x);",
[(set D:$dst, (sextloadi8 P:$ptr))]>;
def : Pat<(i16 (sextloadi8 P:$ptr)),
- (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>;
def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
"$dst = b[$ptr + $off] (x);",
@@ -331,7 +331,7 @@ def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off),
def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))),
(EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off),
- bfin_subreg_lo16)>;
+ lo16)>;
// Memory loads without patterns
let mayLoad = 1 in {
@@ -468,16 +468,16 @@ def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off),
def : Pat<(truncstorei16 D:$val, PI:$ptr),
(STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- bfin_subreg_lo16), PI:$ptr)>;
+ lo16), PI:$ptr)>;
def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr),
(STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)),
- bfin_subreg_hi16), PI:$ptr)>;
+ hi16), PI:$ptr)>;
def : Pat<(truncstorei8 D16L:$val, P:$ptr),
(STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
(i16 (COPY_TO_REGCLASS D16L:$val, D16L)),
- bfin_subreg_lo16),
+ lo16),
P:$ptr)>;
//===----------------------------------------------------------------------===//
@@ -516,19 +516,19 @@ def : Pat<(sext_inreg D16L:$src, i8),
(EXTRACT_SUBREG (MOVEsext8
(INSERT_SUBREG (i32 (IMPLICIT_DEF)),
D16L:$src,
- bfin_subreg_lo16)),
- bfin_subreg_lo16)>;
+ lo16)),
+ lo16)>;
def : Pat<(sext_inreg D:$src, i16),
- (MOVEsext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+ (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>;
def : Pat<(and D:$src, 0xffff),
- (MOVEzext (EXTRACT_SUBREG D:$src, bfin_subreg_lo16))>;
+ (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>;
def : Pat<(i32 (anyext D16L:$src)),
(INSERT_SUBREG (i32 (IMPLICIT_DEF)),
(i16 (COPY_TO_REGCLASS D16L:$src, D16L)),
- bfin_subreg_lo16)>;
+ lo16)>;
// TODO Dreg = Dreg_byte (X/Z)
@@ -859,4 +859,4 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
def : Pat<(BfinCall (i32 texternalsym:$dst)),
(CALLa texternalsym:$dst)>;
def : Pat<(i16 (trunc D:$src)),
- (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
+ (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 2512c9b..5153ace 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -111,7 +111,7 @@ BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const {
bool BlackfinRegisterInfo::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return DisableFramePointerElim(MF) ||
- MFI->hasCalls() || MFI->hasVarSizedObjects();
+ MFI->adjustsStack() || MFI->hasVarSizedObjects();
}
bool BlackfinRegisterInfo::
@@ -177,11 +177,11 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
// We must split into halves
BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_hi16))
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16))
.addImm((value >> 16) & 0xffff)
.addReg(Reg, RegState::ImplicitDefine);
BuildMI(MBB, I, DL,
- TII.get(BF::LOAD16i), getSubReg(Reg, bfin_subreg_lo16))
+ TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16))
.addImm(value & 0xffff)
.addReg(Reg, RegState::ImplicitKill)
.addReg(Reg, RegState::ImplicitDefine);
@@ -394,7 +394,7 @@ void BlackfinRegisterInfo::emitPrologue(MachineFunction &MF) const {
}
if (!hasFP(MF)) {
- assert(!MFI->hasCalls() &&
+ assert(!MFI->adjustsStack() &&
"FP elimination on a non-leaf function is not supported");
adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize);
return;
@@ -435,7 +435,7 @@ void BlackfinRegisterInfo::emitEpilogue(MachineFunction &MF,
assert(FrameSize%4 == 0 && "Misaligned frame size");
if (!hasFP(MF)) {
- assert(!MFI->hasCalls() &&
+ assert(!MFI->adjustsStack() &&
"FP elimination on a non-leaf function is not supported");
adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize);
return;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 7cfb120..03c5450 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -24,13 +24,6 @@ namespace llvm {
class TargetInstrInfo;
class Type;
- // Subregister indices, keep in sync with BlackfinRegisterInfo.td
- enum BfinSubregIdx {
- bfin_subreg_lo16 = 1,
- bfin_subreg_hi16 = 2,
- bfin_subreg_lo32 = 3
- };
-
struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo {
BlackfinSubtarget &Subtarget;
const TargetInstrInfo &TII;
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index d396cc8..e1cfae9 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -11,8 +11,18 @@
// Declarations that describe the Blackfin register file
//===----------------------------------------------------------------------===//
-// Registers are identified with 3-bit group and 3-bit ID numbers.
+// Subregs are:
+// 1: .L
+// 2: .H
+// 3: .W (32 low bits of 40-bit accu)
+let Namespace = "BF" in {
+def lo16 : SubRegIndex;
+def hi16 : SubRegIndex;
+def lo32 : SubRegIndex;
+def hi32 : SubRegIndex;
+}
+// Registers are identified with 3-bit group and 3-bit ID numbers.
class BlackfinReg<string n> : Register<n> {
field bits<3> Group;
field bits<3> Num;
@@ -40,6 +50,7 @@ class Ri<bits<3> group, bits<3> num, string n> : BlackfinReg<n> {
// Ra 40-bit accumulator registers
class Ra<bits<3> num, string n, list<Register> subs> : BlackfinReg<n> {
let SubRegs = subs;
+ let SubRegIndices = [hi32, lo32];
let Group = 4;
let Num = num;
}
@@ -54,6 +65,7 @@ multiclass Rss<bits<3> group, bits<3> num, string n> {
class Rii<bits<3> group, bits<3> num, string n, list<Register> subs>
: BlackfinReg<n> {
let SubRegs = subs;
+ let SubRegIndices = [hi16, lo16];
let Group = group;
let Num = num;
}
@@ -164,7 +176,7 @@ def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>;
def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>;
def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> {
- let SubRegs = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
+ let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS];
}
def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>;
@@ -182,38 +194,6 @@ def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>;
def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
-// Subregs are:
-// 1: .L
-// 2: .H
-// 3: .W (32 low bits of 40-bit accu)
-// Keep in sync with enum in BlackfinRegisterInfo.h
-def bfin_subreg_lo16 : PatLeaf<(i32 1)>;
-def bfin_subreg_hi16 : PatLeaf<(i32 2)>;
-def bfin_subreg_32bit : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1,
- [R0, R1, R2, R3, R4, R5, R6, R7,
- P0, P1, P2, P3, P4, P5, SP, FP,
- I0, I1, I2, I3, M0, M1, M2, M3,
- B0, B1, B2, B3, L0, L1, L2, L3],
- [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
- P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL,
- I0L, I1L, I2L, I3L, M0L, M1L, M2L, M3L,
- B0L, B1L, B2L, B3L, L0L, L1L, L2L, L3L]>;
-
-def : SubRegSet<2,
- [R0, R1, R2, R3, R4, R5, R6, R7,
- P0, P1, P2, P3, P4, P5, SP, FP,
- I0, I1, I2, I3, M0, M1, M2, M3,
- B0, B1, B2, B3, L0, L1, L2, L3],
- [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
- P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH,
- I0H, I1H, I2H, I3H, M0H, M1H, M2H, M3H,
- B0H, B1H, B2H, B3H, L0H, L1H, L2H, L3H]>;
-
-def : SubRegSet<1, [A0, A0W, A1, A1W], [A0L, A0L, A1L, A1L]>;
-def : SubRegSet<2, [A0, A0W, A1, A1W], [A0H, A0H, A1H, A1H]>;
-
// Register classes.
def D16 : RegisterClass<"BF", [i16], 16,
[R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
@@ -260,11 +240,11 @@ def GR16 : RegisterClass<"BF", [i16], 16,
L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
- let SubRegClassList = [D16L, D16H];
+ let SubRegClasses = [(D16L lo16), (D16H hi16)];
}
def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
- let SubRegClassList = [P16L, P16H];
+ let SubRegClasses = [(P16L lo16), (P16H hi16)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -287,7 +267,7 @@ def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
def DP : RegisterClass<"BF", [i32], 32,
[R0, R1, R2, R3, R4, R5, R6, R7,
P0, P1, P2, P3, P4, P5, FP, SP]> {
- let SubRegClassList = [DP16L, DP16H];
+ let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
index f4bb25f..a21f696 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp
@@ -12,10 +12,12 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "blackfin-selectiondag-info"
-#include "BlackfinSelectionDAGInfo.h"
+#include "BlackfinTargetMachine.h"
using namespace llvm;
-BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo() {
+BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo(
+ const BlackfinTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() {
diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
index a620330..f1ce348 100644
--- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
+++ b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class BlackfinTargetMachine;
+
class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- BlackfinSelectionDAGInfo();
+ explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM);
~BlackfinSelectionDAGInfo();
};
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 45d7c35..66a2f68 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -31,6 +31,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
DataLayout("e-p:32:32-i64:32-f64:32-n32"),
Subtarget(TT, FS),
TLInfo(*this),
+ TSInfo(*this),
InstrInfo(Subtarget),
FrameInfo(TargetFrameInfo::StackGrowsDown, 4, 0) {
}
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index 07e7394..a63aa54 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -20,6 +20,7 @@
#include "BlackfinInstrInfo.h"
#include "BlackfinSubtarget.h"
#include "BlackfinISelLowering.h"
+#include "BlackfinSelectionDAGInfo.h"
#include "BlackfinIntrinsicInfo.h"
namespace llvm {
@@ -28,6 +29,7 @@ namespace llvm {
const TargetData DataLayout;
BlackfinSubtarget Subtarget;
BlackfinTargetLowering TLInfo;
+ BlackfinSelectionDAGInfo TSInfo;
BlackfinInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
BlackfinIntrinsicInfo IntrinsicInfo;
@@ -46,6 +48,9 @@ namespace llvm {
virtual const BlackfinTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 67f513b..55b8aaa 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2165,6 +2165,9 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
case CallingConv::X86_FastCall:
Out << "__attribute__((fastcall)) ";
break;
+ case CallingConv::X86_ThisCall:
+ Out << "__attribute__((thiscall)) ";
+ break;
default:
break;
}
@@ -3554,11 +3557,11 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
+bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index d178e7f..6fed195 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -23,12 +23,11 @@ struct CTargetMachine : public TargetMachine {
CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/CellSPU/README.txt b/lib/Target/CellSPU/README.txt
index 4783dd5..0e7ad35 100644
--- a/lib/Target/CellSPU/README.txt
+++ b/lib/Target/CellSPU/README.txt
@@ -10,6 +10,8 @@ Department in The Aerospace Corporation:
- Chandler Carruth (LLVM expertise)
- Nehal Desai (debugging, i32 operations, RoadRunner SPU expertise)
+Some minor fixes added by Kalle Raiskila.
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NONINFRINGEMENT, OR
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 5e04454..081e8d0 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -485,7 +485,7 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
// Set pre-RA register scheduler default to BURR, which produces slightly
// better code than the default (could also be TDRR, but TargetLowering.h
// needs a mod to support that model):
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
}
const char *
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 423da3b..4c53c98 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -255,16 +255,14 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const
{
// We support cross register class moves for our aliases, such as R3 in any
// reg class to any other reg class containing R3. This is required because
// we instruction select bitconvert i64 -> f64 as a noop for example, so our
// types have no specific meaning.
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == SPU::R8CRegisterClass) {
BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg);
} else if (DestRC == SPU::R16CRegisterClass) {
@@ -291,9 +289,10 @@ bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
void
SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const
+ MachineBasicBlock::iterator MI,
+ unsigned SrcReg, bool isKill, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -325,9 +324,10 @@ SPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void
SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MI,
- unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, int FrameIdx,
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
unsigned opc;
bool isValidFrameIdx = (FrameIdx < SPUFrameInfo::maxFrameOffset());
@@ -467,6 +467,9 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
// If there is only one terminator instruction, process it.
if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
if (isUncondBranch(LastInst)) {
+ // Check for jump tables
+ if (!LastInst->getOperand(0).isMBB())
+ return true;
TBB = LastInst->getOperand(0).getMBB();
return false;
} else if (isCondBranch(LastInst)) {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index 42677fc..6dabd7c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -60,19 +60,22 @@ namespace llvm {
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
//! Store a register to a stack slot, based on its register class.
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
//! Load a register from a stack slot, based on its register class.
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
//! Return true if the specified load or store can be folded
virtual
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 6d1f87d..a7fb14c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -655,7 +655,7 @@ def SFHvec:
def SFHr16:
RRForm<0b00010010000, (outs R16C:$rT), (ins R16C:$rA, R16C:$rB),
"sfh\t$rT, $rA, $rB", IntegerOp,
- [(set R16C:$rT, (sub R16C:$rA, R16C:$rB))]>;
+ [(set R16C:$rT, (sub R16C:$rB, R16C:$rA))]>;
def SFHIvec:
RI10Form<0b10110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
@@ -670,11 +670,11 @@ def SFHIr16 : RI10Form<0b10110000, (outs R16C:$rT), (ins R16C:$rA, s10imm:$val),
def SFvec : RRForm<0b00000010000, (outs VECREG:$rT),
(ins VECREG:$rA, VECREG:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
- [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>;
+ [(set (v4i32 VECREG:$rT), (sub (v4i32 VECREG:$rB), (v4i32 VECREG:$rA)))]>;
def SFr32 : RRForm<0b00000010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB),
"sf\t$rT, $rA, $rB", IntegerOp,
- [(set R32C:$rT, (sub R32C:$rA, R32C:$rB))]>;
+ [(set R32C:$rT, (sub R32C:$rB, R32C:$rA))]>;
def SFIvec:
RI10Form<0b00110000, (outs VECREG:$rT), (ins VECREG:$rA, s10imm:$val),
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index fdbe10f..d8937ec 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -469,7 +469,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->hasCalls()) {
+ if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
@@ -569,7 +569,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
// the "empty" frame size is 16 - just the register scavenger spill slot
- if (FrameSize > 16 || MFI->hasCalls()) {
+ if (FrameSize > 16 || MFI->adjustsStack()) {
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
if (isInt<10>(FrameSize + LinkSlotOffset)) {
// Reload $lr, adjust $sp by required amount
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
index ca2a4bf..5732fd4 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "cellspu-selectiondag-info"
-#include "SPUSelectionDAGInfo.h"
+#include "SPUTargetMachine.h"
using namespace llvm;
-SPUSelectionDAGInfo::SPUSelectionDAGInfo() {
+SPUSelectionDAGInfo::SPUSelectionDAGInfo(const SPUTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SPUSelectionDAGInfo::~SPUSelectionDAGInfo() {
diff --git a/lib/Target/CellSPU/SPUSelectionDAGInfo.h b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
index 0a6b4c1..39257d9 100644
--- a/lib/Target/CellSPU/SPUSelectionDAGInfo.h
+++ b/lib/Target/CellSPU/SPUSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SPUTargetMachine;
+
class SPUSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SPUSelectionDAGInfo();
+ explicit SPUSelectionDAGInfo(const SPUTargetMachine &TM);
~SPUSelectionDAGInfo();
};
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 6500067..480ec3f 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -42,6 +42,7 @@ SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
InstrInfo(*this),
FrameInfo(*this),
TLInfo(*this),
+ TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
// For the time being, use static relocations, since there's really no
// support for PIC yet.
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 37e7cd2..7e02701 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -17,6 +17,7 @@
#include "SPUSubtarget.h"
#include "SPUInstrInfo.h"
#include "SPUISelLowering.h"
+#include "SPUSelectionDAGInfo.h"
#include "SPUFrameInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -34,6 +35,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUInstrInfo InstrInfo;
SPUFrameInfo FrameInfo;
SPUTargetLowering TLInfo;
+ SPUSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
SPUTargetMachine(const Target &T, const std::string &TT,
@@ -61,6 +63,10 @@ public:
return &TLInfo;
}
+ virtual const SPUSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const SPURegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index e739b26..45a0c84 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -1038,6 +1038,11 @@ namespace {
Out << ");";
nl(Out);
}
+ if (GV->isThreadLocal()) {
+ printCppName(GV);
+ Out << "->setThreadLocal(true);";
+ nl(Out);
+ }
if (is_inline) {
out(); Out << "}"; nl(Out);
}
@@ -2007,11 +2012,11 @@ char CppWriter::ID = 0;
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify) {
+bool CPPTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index b7aae91..e42166e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -26,12 +26,11 @@ struct CPPTargetMachine : public TargetMachine {
const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
index 04dfb0a..e42e9b3 100644
--- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -155,7 +155,7 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
CPUBitmask |= (1 << MBlazeRegisterInfo::
getRegisterNumbering(RI.getFrameRegister(*MF)));
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
CPUBitmask |= (1 << MBlazeRegisterInfo::
getRegisterNumbering(RI.getRARegister()));
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 01f3174..4c4d86b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -107,7 +107,6 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
void MBlazeInstrInfo::
insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(MBlaze::NOP));
}
@@ -115,8 +114,8 @@ bool MBlazeInstrInfo::
copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg)
.addReg(SrcReg).addReg(MBlaze::R0);
return true;
@@ -125,7 +124,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MBlazeInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
.addImm(0).addFrameIndex(FI);
@@ -134,7 +134,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MBlazeInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
BuildMI(MBB, I, DL, get(MBlaze::LWI), DestReg)
.addImm(0).addFrameIndex(FI);
@@ -210,7 +211,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
MBlaze::CPURegsRegisterClass,
- MBlaze::CPURegsRegisterClass);
+ MBlaze::CPURegsRegisterClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(MBlaze::R20);
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index 4f79f1c..c9fdc88 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -203,16 +203,19 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index e15176e..f15eea9 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -220,7 +220,7 @@ void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
StackOffset += RegSize;
}
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
MBlazeFI->setRAStackOffset(0);
MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
@@ -311,8 +311,8 @@ emitPrologue(MachineFunction &MF) const {
unsigned StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->hasCalls()) return;
- if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
+ if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
int FPOffset = MBlazeFI->getFPStackOffset();
int RAOffset = MBlazeFI->getRAStackOffset();
@@ -323,7 +323,7 @@ emitPrologue(MachineFunction &MF) const {
// Save the return address only if the function isnt a leaf one.
// swi R15, R1, stack_loc
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, DL, TII.get(MBlaze::SWI))
.addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
}
@@ -366,14 +366,14 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
// Restore the return address only if the function isnt a leaf one.
// lwi R15, R1, stack_loc
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
.addImm(RAOffset).addReg(MBlaze::R1);
}
// Get the number of bytes from FrameInfo
int StackSize = (int) MFI->getStackSize();
- if (StackSize < 28 && MFI->hasCalls()) StackSize = 28;
+ if (StackSize < 28 && MFI->adjustsStack()) StackSize = 28;
// adjust stack.
// addi R1, R1, imm
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index 96a5c98..d0a1e75 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -17,21 +17,15 @@ class MBlazeReg<string n> : Register<n> {
let Namespace = "MBlaze";
}
-class MBlazeRegWithSubRegs<string n, list<Register> subregs>
- : RegisterWithSubRegs<n, subregs> {
- field bits<5> Num;
- let Namespace = "MBlaze";
-}
-
// MBlaze CPU Registers
class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
let Num = num;
}
// MBlaze 32-bit (aliased) FPU Registers
-class FPR<bits<5> num, string n, list<Register> subregs>
- : MBlazeRegWithSubRegs<n, subregs> {
+class FPR<bits<5> num, string n, list<Register> aliases> : MBlazeReg<n> {
let Num = num;
+ let Aliases = aliases;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
index 105e42a..6a115b2 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mblaze-selectiondag-info"
-#include "MBlazeSelectionDAGInfo.h"
+#include "MBlazeTargetMachine.h"
using namespace llvm;
-MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo() {
+MBlazeSelectionDAGInfo::MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MBlazeSelectionDAGInfo::~MBlazeSelectionDAGInfo() {
diff --git a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
index 11e6879..9f8e2aa 100644
--- a/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
+++ b/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MBlazeTargetMachine;
+
class MBlazeSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MBlazeSelectionDAGInfo();
+ explicit MBlazeSelectionDAGInfo(const MBlazeTargetMachine &TM);
~MBlazeSelectionDAGInfo();
};
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index 9eba2b3..4252953 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -39,7 +39,7 @@ MBlazeTargetMachine(const Target &T, const std::string &TT,
"f64:32:32-v64:32:32-v128:32:32-n32"),
InstrInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
- TLInfo(*this) {
+ TLInfo(*this), TSInfo(*this) {
if (getRelocationModel() == Reloc::Default) {
setRelocationModel(Reloc::Static);
}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 9bf9898..6a57e58 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -17,6 +17,7 @@
#include "MBlazeSubtarget.h"
#include "MBlazeInstrInfo.h"
#include "MBlazeISelLowering.h"
+#include "MBlazeSelectionDAGInfo.h"
#include "MBlazeIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -31,6 +32,7 @@ namespace llvm {
MBlazeInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
MBlazeTargetLowering TLInfo;
+ MBlazeSelectionDAGInfo TSInfo;
MBlazeIntrinsicInfo IntrinsicInfo;
public:
MBlazeTargetMachine(const Target &T, const std::string &TT,
@@ -54,6 +56,9 @@ namespace llvm {
virtual const MBlazeTargetLowering *getTargetLowering() const
{ return &TLInfo; }
+ virtual const MBlazeSelectionDAGInfo* getSelectionDAGInfo() const
+ { return &TSInfo; }
+
const TargetIntrinsicInfo *getIntrinsicInfo() const
{ return &IntrinsicInfo; }
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 15d16ec..3de173c 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -34,12 +34,11 @@ namespace llvm {
MSILTarget(const Target &T, const std::string &TT, const std::string &FS)
: TargetMachine(T) {}
- virtual bool WantsWholeFile() const { return true; }
- virtual bool addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &Out,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify);
+ virtual bool addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &Out,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
@@ -279,6 +278,8 @@ std::string MSILWriter::getConvModopt(CallingConv::ID CallingConvID) {
return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvFastcall) ";
case CallingConv::X86_StdCall:
return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvStdcall) ";
+ case CallingConv::X86_ThisCall:
+ return "modopt([mscorlib]System.Runtime.CompilerServices.CallConvThiscall) ";
default:
errs() << "CallingConvID = " << CallingConvID << '\n';
llvm_unreachable("Unsupported calling convention");
@@ -1686,11 +1687,11 @@ void MSILWriter::printExternals() {
// External Interface declaration
//===----------------------------------------------------------------------===//
-bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
- formatted_raw_ostream &o,
- CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel,
- bool DisableVerify)
+bool MSILTarget::addPassesToEmitFile(PassManagerBase &PM,
+ formatted_raw_ostream &o,
+ CodeGenFileType FileType,
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify)
{
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
index f9620e8..e937696 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.h
@@ -26,7 +26,7 @@ namespace llvm {
/// MSP430MCInstLower - This class is used to lower an MachineInstr
/// into an MCInst.
-class VISIBILITY_HIDDEN MSP430MCInstLower {
+class LLVM_LIBRARY_VISIBILITY MSP430MCInstLower {
MCContext &Ctx;
Mangler &Mang;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index c3e2bdf7..403400e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -83,7 +83,7 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setStackPointerRegisterToSaveRestore(MSP430::SPW);
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(SchedulingForLatency);
+ setSchedulingPreference(Sched::Latency);
// We have post-incremented loads / stores.
setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
@@ -897,6 +897,9 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
@@ -920,6 +923,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 2b09b3d..18226ab 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -32,7 +32,8 @@ MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -59,7 +60,8 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
@@ -85,10 +87,8 @@ bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == SrcRC) {
unsigned Opc;
if (DestRC == &MSP430::GR16RegClass) {
@@ -130,7 +130,8 @@ MSP430InstrInfo::isMoveInstr(const MachineInstr& MI,
bool
MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -154,7 +155,8 @@ MSP430InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
MSP430InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 6ef4b0a..842b4cb 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -52,7 +52,8 @@ public:
bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -62,18 +63,22 @@ public:
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 4078626..f8aec66 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -43,6 +43,9 @@ def R13B : MSP430Reg<13, "r13">;
def R14B : MSP430Reg<14, "r14">;
def R15B : MSP430Reg<15, "r15">;
+def subreg_8bit : SubRegIndex { let Namespace = "MSP430"; }
+
+let SubRegIndices = [subreg_8bit] in {
def PCW : MSP430RegWithSubregs<0, "r0", [PCB]>;
def SPW : MSP430RegWithSubregs<1, "r1", [SPB]>;
def SRW : MSP430RegWithSubregs<2, "r2", [SRB]>;
@@ -59,13 +62,7 @@ def R12W : MSP430RegWithSubregs<12, "r12", [R12B]>;
def R13W : MSP430RegWithSubregs<13, "r13", [R13B]>;
def R14W : MSP430RegWithSubregs<14, "r14", [R14B]>;
def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
-
-def : SubRegSet<1, [PCW, SPW, SRW, CGW, FPW,
- R5W, R6W, R7W, R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
- [PCB, SPB, SRB, CGB, FPB,
- R5B, R6B, R7B, R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def subreg_8bit : PatLeaf<(i32 1)>;
+}
def GR8 : RegisterClass<"MSP430", [i8], 8,
// Volatile registers
@@ -101,7 +98,7 @@ def GR16 : RegisterClass<"MSP430", [i16], 16,
// Volatile, but not allocable
PCW, SPW, SRW, CGW]>
{
- let SubRegClassList = [GR8];
+ let SubRegClasses = [(GR8 subreg_8bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
index a54c929..24f45fa 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "msp430-selectiondag-info"
-#include "MSP430SelectionDAGInfo.h"
+#include "MSP430TargetMachine.h"
using namespace llvm;
-MSP430SelectionDAGInfo::MSP430SelectionDAGInfo() {
+MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() {
diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
index c952ab7..fa81948 100644
--- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h
+++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MSP430TargetMachine;
+
class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MSP430SelectionDAGInfo();
+ explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM);
~MSP430SelectionDAGInfo();
};
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index a0dbac2..99877c8 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -33,7 +33,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
Subtarget(TT, FS),
// FIXME: Check TargetData string.
DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index 68bde9a..b93edfd 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -17,6 +17,7 @@
#include "MSP430InstrInfo.h"
#include "MSP430ISelLowering.h"
+#include "MSP430SelectionDAGInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430Subtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
MSP430InstrInfo InstrInfo;
MSP430TargetLowering TLInfo;
+ MSP430SelectionDAGInfo TSInfo;
// MSP430 does not have any call stack frame, therefore not having
// any MSP430 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
return &TLInfo;
}
+ virtual const MSP430SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
}; // MSP430TargetMachine.
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index d269153..4d7fe4c 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -145,7 +145,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) {
CPUBitmask |= (1 << MipsRegisterInfo::
getRegisterNumbering(RI.getFrameRegister(*MF)));
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
CPUBitmask |= (1 << MipsRegisterInfo::
getRegisterNumbering(RI.getRARegister()));
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index ee85a3f3..3888bbf 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -225,12 +225,12 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
MVT::Other, Offset0, Base, Chain);
SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, NVT), 0);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(LD0, 0));
SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
MVT::Other, Offset1, Base, SDValue(LD0, 1));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(LD1, 0));
ReplaceUses(SDValue(N, 0), I1);
@@ -266,9 +266,9 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
// Get the even and odd part from the f64 register
- SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD,
+ SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd,
dl, MVT::f32, N1);
- SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN,
+ SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven,
dl, MVT::f32, N1);
// The second store should start after for 4 bytes.
@@ -438,9 +438,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
SDValue Undef = SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0);
SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero);
- SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl,
+ SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
MVT::f64, Undef, SDValue(MTC, 0));
- SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl,
+ SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
MVT::f64, I0, SDValue(MTC, 0));
ReplaceUses(SDValue(Node, 0), I1);
return I1.getNode();
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index dbd3c24..4005e35 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -124,7 +124,6 @@ void MipsInstrInfo::
insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const
{
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
BuildMI(MBB, MI, DL, get(Mips::NOP));
}
@@ -132,10 +131,8 @@ bool MipsInstrInfo::
copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
-
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
@@ -190,7 +187,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MipsInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -223,7 +221,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void MipsInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -624,7 +623,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass);
bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP,
Mips::CPURegsRegisterClass,
- Mips::CPURegsRegisterClass);
+ Mips::CPURegsRegisterClass,
+ DebugLoc());
assert(Ok && "Couldn't assign to global base register!");
Ok = Ok; // Silence warning when assertions are turned off.
RegInfo.addLiveIn(Mips::GP);
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index ab8dc59..7919d9a 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -209,16 +209,19 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index 478da84..5e719af 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -288,7 +288,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
// Stack locations for FP and RA. If only one of them is used,
// the space must be allocated for both, otherwise no space at all.
- if (hasFP(MF) || MFI->hasCalls()) {
+ if (hasFP(MF) || MFI->adjustsStack()) {
// FP stack location
MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
@@ -302,7 +302,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
MipsFI->setRAStackOffset(StackOffset);
StackOffset += RegSize;
- if (MFI->hasCalls())
+ if (MFI->adjustsStack())
TopCPUSavedRegOff += RegSize;
}
@@ -407,7 +407,7 @@ emitPrologue(MachineFunction &MF) const
unsigned StackSize = MFI->getStackSize();
// No need to allocate space on the stack.
- if (StackSize == 0 && !MFI->hasCalls()) return;
+ if (StackSize == 0 && !MFI->adjustsStack()) return;
int FPOffset = MipsFI->getFPStackOffset();
int RAOffset = MipsFI->getRAStackOffset();
@@ -425,7 +425,7 @@ emitPrologue(MachineFunction &MF) const
// Save the return address only if the function isnt a leaf one.
// sw $ra, stack_loc($sp)
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(Mips::SW))
.addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP);
}
@@ -477,7 +477,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
// Restore the return address only if the function isnt a leaf one.
// lw $ra, stack_loc($sp)
- if (MFI->hasCalls()) {
+ if (MFI->adjustsStack()) {
BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA)
.addImm(RAOffset).addReg(Mips::SP);
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 9fd044c..bc857b8 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -23,15 +23,6 @@ class MipsSubtarget;
class TargetInstrInfo;
class Type;
-namespace Mips {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// MipsRegisterInfo.td file.
- enum SubregIndex {
- SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
- };
-}
-
struct MipsRegisterInfo : public MipsGenRegisterInfo {
const MipsSubtarget &Subtarget;
const TargetInstrInfo &TII;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index 00e7723..be78a22 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -34,9 +34,14 @@ class FPR<bits<5> num, string n> : MipsReg<n> {
}
// Mips 64-bit (aliased) FPU Registers
-class AFPR<bits<5> num, string n, list<Register> subregs>
+let Namespace = "Mips" in {
+def sub_fpeven : SubRegIndex;
+def sub_fpodd : SubRegIndex;
+}
+class AFPR<bits<5> num, string n, list<Register> subregs>
: MipsRegWithSubRegs<n, subregs> {
let Num = num;
+ let SubRegIndices = [sub_fpeven, sub_fpodd];
}
//===----------------------------------------------------------------------===//
@@ -141,23 +146,6 @@ let Namespace = "Mips" in {
}
//===----------------------------------------------------------------------===//
-// Subregister Set Definitions
-//===----------------------------------------------------------------------===//
-
-def mips_subreg_fpeven : PatLeaf<(i32 1)>;
-def mips_subreg_fpodd : PatLeaf<(i32 2)>;
-
-def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [F0, F2, F4, F6, F8, F10, F12, F14,
- F16, F18, F20, F22, F24, F26, F28, F30]>;
-
-def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
- D8, D9, D10, D11, D12, D13, D14, D15],
- [F1, F3, F5, F7, F9, F11, F13, F15,
- F17, F19, F21, F23, F25, F27, F29, F31]>;
-
-//===----------------------------------------------------------------------===//
// Register Classes
//===----------------------------------------------------------------------===//
@@ -255,7 +243,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64,
// Reserved
D15]>
{
- let SubRegClassList = [FGR32, FGR32];
+ let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
index 72c149d..e4d70fc 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mips-selectiondag-info"
-#include "MipsSelectionDAGInfo.h"
+#include "MipsTargetMachine.h"
using namespace llvm;
-MipsSelectionDAGInfo::MipsSelectionDAGInfo() {
+MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
MipsSelectionDAGInfo::~MipsSelectionDAGInfo() {
diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h
index 6eaf0c9..6cafb55 100644
--- a/lib/Target/Mips/MipsSelectionDAGInfo.h
+++ b/lib/Target/Mips/MipsSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class MipsTargetMachine;
+
class MipsSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- MipsSelectionDAGInfo();
+ explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM);
~MipsSelectionDAGInfo();
};
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4724ff7..ad3eb9e 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -42,7 +42,7 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
InstrInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
- TLInfo(*this) {
+ TLInfo(*this), TSInfo(*this) {
// Abicall enables PIC by default
if (getRelocationModel() == Reloc::Default) {
if (Subtarget.isABI_O32())
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index cd671cf..d63976f 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -17,6 +17,7 @@
#include "MipsSubtarget.h"
#include "MipsInstrInfo.h"
#include "MipsISelLowering.h"
+#include "MipsSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameInfo.h"
@@ -30,6 +31,7 @@ namespace llvm {
MipsInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
MipsTargetLowering TLInfo;
+ MipsSelectionDAGInfo TSInfo;
public:
MipsTargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool isLittle);
@@ -51,6 +53,10 @@ namespace llvm {
return &TLInfo;
}
+ virtual const MipsSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
index a424c27..aa2e1f4 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h
@@ -29,7 +29,7 @@
#include <string>
namespace llvm {
- class VISIBILITY_HIDDEN PIC16AsmPrinter : public AsmPrinter {
+ class LLVM_LIBRARY_VISIBILITY PIC16AsmPrinter : public AsmPrinter {
public:
explicit PIC16AsmPrinter(TargetMachine &TM, MCStreamer &Streamer);
private:
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 5d86329..6a4d0d6 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -70,7 +70,7 @@ void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
// We also need to encode the information about the base type of
// pointer in TypeNo.
- DIType BaseType = DIDerivedType(Ty.getNode()).getTypeDerivedFrom();
+ DIType BaseType = DIDerivedType(Ty).getTypeDerivedFrom();
PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
}
@@ -79,7 +79,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
bool &HasAux, int Aux[],
std::string &TagName) {
- DICompositeType CTy = DICompositeType(Ty.getNode());
+ DICompositeType CTy = DICompositeType(Ty);
DIArray Elements = CTy.getTypeArray();
unsigned short size = 1;
unsigned short Dimension[4]={0,0,0,0};
@@ -88,7 +88,7 @@ void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
TypeNo = TypeNo | PIC16Dbg::DT_ARY;
- DISubrange SubRange = DISubrange(Element.getNode());
+ DISubrange SubRange = DISubrange(Element);
Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
// Each dimension is represented by 2 bytes starting at byte 9.
Aux[8+i*2+0] = Dimension[i];
@@ -111,7 +111,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
unsigned short &TypeNo,
bool &HasAux, int Aux[],
std::string &TagName) {
- DICompositeType CTy = DICompositeType(Ty.getNode());
+ DICompositeType CTy = DICompositeType(Ty);
TypeNo = TypeNo << PIC16Dbg::S_BASIC;
if (Ty.getTag() == dwarf::DW_TAG_structure_type)
TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
@@ -124,7 +124,7 @@ void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
// llvm.dbg.composite* global variable. Since we need to revisit
// PIC16DebugInfo implementation anyways after the MDNodes based
// framework is done, let us continue with the way it is.
- std::string UniqueSuffix = "." + Ty.getNode()->getNameStr().substr(18);
+ std::string UniqueSuffix = "." + Ty->getNameStr().substr(18);
TagName += UniqueSuffix;
unsigned short size = CTy.getSizeInBits()/8;
// 7th and 8th byte represent size.
@@ -303,7 +303,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
bool HasAux = false;
int ElementAux[PIC16Dbg::AuxSize] = { 0 };
std::string TagName = "";
- DIDerivedType DITy(Element.getNode());
+ DIDerivedType DITy(Element);
unsigned short ElementSize = DITy.getSizeInBits()/8;
// Get mangleddd name for this structure/union element.
std::string MangMemName = DITy.getName().str() + SuffixNo;
@@ -336,7 +336,7 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
CTy.getTag() == dwarf::DW_TAG_structure_type ) {
// Get the number after llvm.dbg.composite and make UniqueSuffix from
// it.
- std::string DIVar = CTy.getNode()->getNameStr();
+ std::string DIVar = CTy->getNameStr();
std::string UniqueSuffix = "." + DIVar.substr(18);
std::string MangledCTyName = CTy.getName().str() + UniqueSuffix;
unsigned short size = CTy.getSizeInBits()/8;
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index f1fcec5..ecaddd3 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -26,7 +26,7 @@ using namespace llvm;
namespace {
-class VISIBILITY_HIDDEN PIC16DAGToDAGISel : public SelectionDAGISel {
+class LLVM_LIBRARY_VISIBILITY PIC16DAGToDAGISel : public SelectionDAGISel {
/// TM - Keep a reference to PIC16TargetMachine.
const PIC16TargetMachine &TM;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 9e415e0..793dd9f 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -70,7 +70,8 @@ unsigned PIC16InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const PIC16TargetLowering *PTLI = TM.getTargetLowering();
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -112,7 +113,8 @@ void PIC16InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const PIC16TargetLowering *PTLI = TM.getTargetLowering();
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -153,9 +155,8 @@ bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == PIC16::FSR16RegisterClass) {
BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg);
diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h
index 56f51f0..40a4cb4 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.h
+++ b/lib/Target/PIC16/PIC16InstrInfo.h
@@ -49,17 +49,20 @@ public:
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual bool isMoveInstr(const MachineInstr &MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 9039ca7..5b33b51 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -44,7 +44,8 @@ namespace llvm {
unsigned Size;
PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
- : MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
+ : MCSection(SV_PIC16, K), Name(name), Address(addr),
+ Color(color), Size(0) {
}
public:
@@ -86,6 +87,11 @@ namespace llvm {
/// to a section.
virtual void PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS) const;
+
+ static bool classof(const MCSection *S) {
+ return S->getVariant() == SV_PIC16;
+ }
+ static bool classof(const PIC16Section *) { return true; }
};
} // end namespace llvm
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
index 76c6c60..995955a 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "pic16-selectiondag-info"
-#include "PIC16SelectionDAGInfo.h"
+#include "PIC16TargetMachine.h"
using namespace llvm;
-PIC16SelectionDAGInfo::PIC16SelectionDAGInfo() {
+PIC16SelectionDAGInfo::PIC16SelectionDAGInfo(const PIC16TargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
PIC16SelectionDAGInfo::~PIC16SelectionDAGInfo() {
diff --git a/lib/Target/PIC16/PIC16SelectionDAGInfo.h b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
index 112480e5..c67fd8b 100644
--- a/lib/Target/PIC16/PIC16SelectionDAGInfo.h
+++ b/lib/Target/PIC16/PIC16SelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class PIC16TargetMachine;
+
class PIC16SelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- PIC16SelectionDAGInfo();
+ explicit PIC16SelectionDAGInfo(const PIC16TargetMachine &TM);
~PIC16SelectionDAGInfo();
};
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index e2acb85..82b69be 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -35,7 +35,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, Trad),
DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
diff --git a/lib/Target/PIC16/PIC16TargetMachine.h b/lib/Target/PIC16/PIC16TargetMachine.h
index 849845a..dae5d31 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.h
+++ b/lib/Target/PIC16/PIC16TargetMachine.h
@@ -17,6 +17,7 @@
#include "PIC16InstrInfo.h"
#include "PIC16ISelLowering.h"
+#include "PIC16SelectionDAGInfo.h"
#include "PIC16RegisterInfo.h"
#include "PIC16Subtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class PIC16TargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
PIC16InstrInfo InstrInfo;
PIC16TargetLowering TLInfo;
+ PIC16SelectionDAGInfo TSInfo;
// PIC16 does not have any call stack frame, therefore not having
// any PIC16 specific FrameInfo class.
@@ -54,6 +56,10 @@ public:
return &TLInfo;
}
+ virtual const PIC16SelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM,
CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3d9f8aa..00eebb8 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -712,8 +712,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
if (PPCSubTarget.isGigaProcessor() && OtherCondIdx == -1)
IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
CCReg), 0);
- else
- IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, CCReg), 0);
+ else
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ CR7Reg, CCReg), 0);
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
@@ -848,7 +849,8 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
N->getOperand(0), InFlag);
else
- return CurDAG->getMachineNode(PPC::MFCR, dl, MVT::i32, InFlag);
+ return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
+ N->getOperand(0), InFlag);
}
case ISD::SDIV: {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 6f11953..10b516a 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5496,12 +5496,15 @@ bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
DebugLoc dl = Op.getDebugLoc();
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// Make sure the function does not optimize away the store of the RA to
// the stack.
- MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
FuncInfo->setLRStoreRequired();
bool isPPC64 = PPCSubTarget.isPPC64();
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 1d05f3d..6dcaf1e 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -111,9 +111,10 @@ namespace llvm {
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
- /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCR/MFOCRF instructions.
- /// This copies the bits corresponding to the specified CRREG into the
- /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
+ /// instructions. This copies the bits corresponding to the specified
+ /// CRREG into the resultant GPR. Bits corresponding to other CR regs
+ /// are undefined.
MFCR,
/// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index ae1fbd8..1b7a778 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -203,8 +203,6 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const {
DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
BuildMI(MBB, MI, DL, get(PPC::NOP));
}
@@ -347,15 +345,13 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (MI != MBB.end()) DL = MI->getDebugLoc();
-
if (DestRC == PPC::GPRCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (DestRC == PPC::G8RCRegisterClass) {
@@ -446,7 +442,8 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
// issue a MFCR to save all of the CRBits.
unsigned ScratchReg = TM.getSubtargetImpl()->isDarwinABI() ?
PPC::R2 : PPC::R0;
- NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCR), ScratchReg));
+ NewMIs.push_back(BuildMI(MF, DL, get(PPC::MFCRpseud), ScratchReg)
+ .addReg(SrcReg, getKillRegState(isKill)));
// If the saved register wasn't CR0, shift the bits left so that they are
// in CR0's slot.
@@ -520,7 +517,8 @@ void
PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
@@ -635,7 +633,8 @@ void
PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
SmallVector<MachineInstr*, 4> NewMIs;
DebugLoc DL;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index 9fb6e7d..7a9e11b 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -114,17 +114,20 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
int FrameIx,
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 532a3ec..63b4581 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -662,7 +662,7 @@ def STWCX : XForm_1<31, 150, (outs), (ins GPRC:$rS, memrr:$dst),
[(PPCstcx GPRC:$rS, xoaddr:$dst)]>,
isDOT;
-let isBarrier = 1, hasCtrlDep = 1 in
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStGeneral, [(trap)]>;
//===----------------------------------------------------------------------===//
@@ -862,7 +862,6 @@ def STFDX : XForm_28<31, 727, (outs), (ins F8RC:$frS, memrr:$dst),
[(store F8RC:$frS, xaddr:$dst)]>;
}
-let isBarrier = 1 in
def SYNC : XForm_24_sync<31, 598, (outs), (ins),
"sync", LdStSync,
[(int_ppc_sync)]>;
@@ -1118,14 +1117,17 @@ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs GPRC:$rT), (ins),
def MTCRF : XFXForm_5<31, 144, (outs), (ins crbitm:$FXM, GPRC:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
-// FIXME: this Uses all the CR registers. Marking it as such is
-// necessary for DeadMachineInstructionElim to do the right thing.
-// However, marking it also exposes PR 2964, and causes crashes in
-// the Local RA because it doesn't like this sequence:
+
+// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
+// declaring that here gives the local register allocator problems with this:
// vreg = MCRF CR0
// MFCR <kill of whatever preg got assigned to vreg>
-// For now DeadMachineInstructionElim is turned off, so don't do the marking.
-def MFCR : XFXForm_3<31, 19, (outs GPRC:$rT), (ins), "mfcr $rT", SprMFCR>,
+// while not declaring it breaks DeadMachineInstructionElimination.
+// As it turns out, in all cases where we currently use this,
+// we're only interested in one subregister of it. Represent this in the
+// instruction to keep the register allocator from becoming confused.
+def MFCRpseud: XFXForm_3<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
+ "mfcr $rT ${:comment} $FXM", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
def MFOCRF: XFXForm_5a<31, 19, (outs GPRC:$rT), (ins crbitm:$FXM),
"mfcr $rT, $FXM", SprMFCR>,
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 5f1e04e..0ff852c 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -689,19 +689,15 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
+ unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
- // an MFCR to save all of the CRBits. Add an implicit kill of the CR.
- if (!MI.getOperand(0).isKill())
- BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg);
- else
- // Implicitly kill the CR register.
- BuildMI(MBB, II, dl, TII.get(PPC::MFCR), Reg)
- .addReg(MI.getOperand(0).getReg(), RegState::ImplicitKill);
+ // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(PPC::MFCRpseud), Reg)
+ .addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
// CR0's slot.
- unsigned SrcReg = MI.getOperand(0).getReg();
if (SrcReg != PPC::CR0)
// rlwinm rA, rA, ShiftBits, 0, 31.
BuildMI(MBB, II, dl, TII.get(PPC::RLWINM), Reg)
@@ -1009,7 +1005,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const {
if (!DisableRedZone &&
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls() && // No calls.
+ !MFI->adjustsStack() && // No calls.
(!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
// No need for frame
MFI->setStackSize(0);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 1cb7340..8604f54 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -10,6 +10,15 @@
//
//===----------------------------------------------------------------------===//
+let Namespace = "PPC" in {
+def sub_lt : SubRegIndex;
+def sub_gt : SubRegIndex;
+def sub_eq : SubRegIndex;
+def sub_un : SubRegIndex;
+def sub_32 : SubRegIndex;
+}
+
+
class PPCReg<string n> : Register<n> {
let Namespace = "PPC";
}
@@ -25,6 +34,7 @@ class GPR<bits<5> num, string n> : PPCReg<n> {
class GP8<GPR SubReg, string n> : PPCReg<n> {
field bits<5> Num = SubReg.Num;
let SubRegs = [SubReg];
+ let SubRegIndices = [sub_32];
}
// SPR - One of the 32-bit special-purpose registers
@@ -225,6 +235,7 @@ def CR7EQ : CRBIT<30, "30">, DwarfRegNum<[0]>;
def CR7UN : CRBIT<31, "31">, DwarfRegNum<[0]>;
// Condition registers
+let SubRegIndices = [sub_lt, sub_gt, sub_eq, sub_un] in {
def CR0 : CR<0, "cr0", [CR0LT, CR0GT, CR0EQ, CR0UN]>, DwarfRegNum<[68]>;
def CR1 : CR<1, "cr1", [CR1LT, CR1GT, CR1EQ, CR1UN]>, DwarfRegNum<[69]>;
def CR2 : CR<2, "cr2", [CR2LT, CR2GT, CR2EQ, CR2UN]>, DwarfRegNum<[70]>;
@@ -233,15 +244,7 @@ def CR4 : CR<4, "cr4", [CR4LT, CR4GT, CR4EQ, CR4UN]>, DwarfRegNum<[72]>;
def CR5 : CR<5, "cr5", [CR5LT, CR5GT, CR5EQ, CR5UN]>, DwarfRegNum<[73]>;
def CR6 : CR<6, "cr6", [CR6LT, CR6GT, CR6EQ, CR6UN]>, DwarfRegNum<[74]>;
def CR7 : CR<7, "cr7", [CR7LT, CR7GT, CR7EQ, CR7UN]>, DwarfRegNum<[75]>;
-
-def : SubRegSet<1, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0LT, CR1LT, CR2LT, CR3LT, CR4LT, CR5LT, CR6LT, CR7LT]>;
-def : SubRegSet<2, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0GT, CR1GT, CR2GT, CR3GT, CR4GT, CR5GT, CR6GT, CR7GT]>;
-def : SubRegSet<3, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0EQ, CR1EQ, CR2EQ, CR3EQ, CR4EQ, CR5EQ, CR6EQ, CR7EQ]>;
-def : SubRegSet<4, [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7],
- [CR0UN, CR1UN, CR2UN, CR3UN, CR4UN, CR5UN, CR6UN, CR7UN]>;
+}
// Link register
def LR : SPR<8, "lr">, DwarfRegNum<[65]>;
@@ -372,7 +375,7 @@ def CRBITRC : RegisterClass<"PPC", [i32], 32,
def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2,
CR3, CR4]>
{
- let SubRegClassList = [CRBITRC, CRBITRC, CRBITRC, CRBITRC];
+ let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
}
def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
index c0004a9..d4258b4 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "powerpc-selectiondag-info"
-#include "PPCSelectionDAGInfo.h"
+#include "PPCTargetMachine.h"
using namespace llvm;
-PPCSelectionDAGInfo::PPCSelectionDAGInfo() {
+PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {
diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
index 3ad3418..341b69c 100644
--- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h
+++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class PPCTargetMachine;
+
class PPCSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- PPCSelectionDAGInfo();
+ explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM);
~PPCSelectionDAGInfo();
};
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index c4a7408..10cd10b 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -44,7 +44,8 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
- FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit), TLInfo(*this),
+ FrameInfo(*this, is64Bit), JITInfo(*this, is64Bit),
+ TLInfo(*this), TSInfo(*this),
InstrItins(Subtarget.getInstrItineraryData()) {
if (getRelocationModel() == Reloc::Default) {
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 35e33a2..626ddbb 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -19,6 +19,7 @@
#include "PPCJITInfo.h"
#include "PPCInstrInfo.h"
#include "PPCISelLowering.h"
+#include "PPCSelectionDAGInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetData.h"
@@ -35,6 +36,7 @@ class PPCTargetMachine : public LLVMTargetMachine {
PPCFrameInfo FrameInfo;
PPCJITInfo JITInfo;
PPCTargetLowering TLInfo;
+ PPCSelectionDAGInfo TSInfo;
InstrItineraryData InstrItins;
public:
@@ -47,6 +49,9 @@ public:
virtual const PPCTargetLowering *getTargetLowering() const {
return &TLInfo;
}
+ virtual const PPCSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const PPCRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 144bf5d..7fa73ed 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -1833,6 +1833,21 @@ entry:
We should use DSE + llvm.lifetime.end to delete dead vtable pointer updates.
See GCC PR34949
+Another interesting case is that something related could be used for variables
+that go const after their ctor has finished. In these cases, globalopt (which
+can statically run the constructor) could mark the global const (so it gets put
+in the readonly section). A testcase would be:
+
+#include <complex>
+using namespace std;
+const complex<char> should_be_in_rodata (42,-42);
+complex<char> should_be_in_data (42,-42);
+complex<char> should_be_in_bss;
+
+Where we currently evaluate the ctors but the globals don't become const because
+the optimizer doesn't know they "become const" after the ctor is done. See
+GCC PR4131 for more examples.
+
//===---------------------------------------------------------------------===//
In this code:
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index e494d7d..8e49eca 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -122,15 +122,13 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC != SrcRC) {
// Not yet supported!
return false;
}
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
-
if (DestRC == SP::IntRegsRegisterClass)
BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg);
else if (DestRC == SP::FPRegsRegisterClass)
@@ -148,7 +146,8 @@ bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
void SparcInstrInfo::
storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -169,7 +168,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
void SparcInstrInfo::
loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, int FI,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 345674b..a00ba39 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -74,17 +74,20 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 2b05c19..fede929 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -20,6 +20,11 @@ class SparcCtrlReg<string n>: Register<n> {
let Namespace = "SP";
}
+let Namespace = "SP" in {
+def sub_even : SubRegIndex;
+def sub_odd : SubRegIndex;
+}
+
// Registers are identified with 5-bit ID numbers.
// Ri - 32-bit integer registers
class Ri<bits<5> num, string n> : SparcReg<n> {
@@ -33,6 +38,7 @@ class Rf<bits<5> num, string n> : SparcReg<n> {
class Rd<bits<5> num, string n, list<Register> subregs> : SparcReg<n> {
let Num = num;
let SubRegs = subregs;
+ let SubRegIndices = [sub_even, sub_odd];
}
// Control Registers
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
index 4825aa9..190c575 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "sparc-selectiondag-info"
-#include "SparcSelectionDAGInfo.h"
+#include "SparcTargetMachine.h"
using namespace llvm;
-SparcSelectionDAGInfo::SparcSelectionDAGInfo() {
+SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SparcSelectionDAGInfo::~SparcSelectionDAGInfo() {
diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h
index bc1b561..dcd4203 100644
--- a/lib/Target/Sparc/SparcSelectionDAGInfo.h
+++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SparcTargetMachine;
+
class SparcSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SparcSelectionDAGInfo();
+ explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM);
~SparcSelectionDAGInfo();
};
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index a676623..b58d6ba 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -34,7 +34,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, is64bit),
DataLayout(Subtarget.getDataLayout()),
- TLInfo(*this), InstrInfo(Subtarget),
+ TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 1367a31..322c82a 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -20,6 +20,7 @@
#include "SparcInstrInfo.h"
#include "SparcSubtarget.h"
#include "SparcISelLowering.h"
+#include "SparcSelectionDAGInfo.h"
namespace llvm {
@@ -27,6 +28,7 @@ class SparcTargetMachine : public LLVMTargetMachine {
SparcSubtarget Subtarget;
const TargetData DataLayout; // Calculates type size & alignment
SparcTargetLowering TLInfo;
+ SparcSelectionDAGInfo TSInfo;
SparcInstrInfo InstrInfo;
TargetFrameInfo FrameInfo;
public:
@@ -42,6 +44,9 @@ public:
virtual const SparcTargetLowering* getTargetLowering() const {
return &TLInfo;
}
+ virtual const SparcSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const TargetData *getTargetData() const { return &DataLayout; }
// Pass Pipeline Configuration
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp
index 2094cc9..b35190a 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/Target/SubtargetFeature.cpp
@@ -359,29 +359,25 @@ void SubtargetFeatures::dump() const {
print(dbgs());
}
-/// getDefaultSubtargetFeatures - Return a string listing
-/// the features associated with the target triple.
+/// getDefaultSubtargetFeatures - Return a string listing the features
+/// associated with the target triple.
///
/// FIXME: This is an inelegant way of specifying the features of a
/// subtarget. It would be better if we could encode this information
/// into the IR. See <rdar://5972456>.
///
-std::string SubtargetFeatures::getDefaultSubtargetFeatures(
- const Triple& Triple) {
- switch (Triple.getVendor()) {
- case Triple::Apple:
- switch (Triple.getArch()) {
- case Triple::ppc: // powerpc-apple-*
- return std::string("altivec");
- case Triple::ppc64: // powerpc64-apple-*
- return std::string("64bit,altivec");
- default:
- break;
+void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
+ const Triple& Triple) {
+ setCPU(CPU);
+
+ if (Triple.getVendor() == Triple::Apple) {
+ if (Triple.getArch() == Triple::ppc) {
+ // powerpc-apple-*
+ AddFeature("altivec");
+ } else if (Triple.getArch() == Triple::ppc64) {
+ // powerpc64-apple-*
+ AddFeature("64bit");
+ AddFeature("altivec");
}
- break;
- default:
- break;
- }
-
- return std::string("");
+ }
}
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index 07cfb2c..90be222 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -124,9 +124,9 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
unsigned Reg = MO.getReg();
if (Modifier && strncmp(Modifier, "subreg", 6) == 0) {
if (strncmp(Modifier + 7, "even", 4) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_EVEN);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32);
else if (strncmp(Modifier + 7, "odd", 3) == 0)
- Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::SUBREG_ODD);
+ Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32);
else
assert(0 && "Invalid subreg modifier");
}
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 75d563b..bb2952a 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -30,11 +30,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-static const unsigned subreg_even32 = 1;
-static const unsigned subreg_odd32 = 2;
-static const unsigned subreg_even = 3;
-static const unsigned subreg_odd = 4;
-
namespace {
/// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's
/// instead of register numbers for the leaves of the matched tree.
@@ -644,7 +639,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
Dividend =
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
- CurDAG->getTargetConstant(subreg_odd, MVT::i32));
+ CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32));
SDNode *Result;
SDValue DivVal = SDValue(Dividend, 0);
@@ -660,7 +655,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the division (odd subreg) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -673,7 +669,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_even32 : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -718,7 +715,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
dl, ResVT);
{
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
Dividend =
CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT,
SDValue(Tmp, 0), SDValue(Dividend, 0),
@@ -742,7 +740,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the division (odd subreg) result, if it is needed.
if (!SDValue(Node, 0).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_odd32 : subreg_odd);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_odd32 : SystemZ::subreg_odd);
SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
@@ -754,7 +753,8 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Copy the remainder (even subreg) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
- unsigned SubRegIdx = (is32Bit ? subreg_even32 : subreg_even);
+ unsigned SubRegIdx = (is32Bit ?
+ SystemZ::subreg_even32 : SystemZ::subreg_even);
SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
dl, NVT,
SDValue(Result, 0),
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index e98f18b..76f2901 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -81,7 +81,7 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
// LLVM's current latency-oriented scheduler can't handle physreg definitions
// such as SystemZ has with PSW, so set this to the register-pressure
// scheduler, because it can.
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index c92caa4..043686c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -61,7 +61,8 @@ static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) {
void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -90,7 +91,8 @@ void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const{
DebugLoc DL;
if (MI != MBB.end()) DL = MI->getDebugLoc();
@@ -119,9 +121,8 @@ bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// Determine if DstRC and SrcRC have a common superclass.
const TargetRegisterClass *CommonRC = DestRC;
@@ -269,7 +270,8 @@ unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
bool
SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -333,7 +335,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetRegisterClass *RegClass = CSI[i].getRegClass();
if (RegClass == &SystemZ::FP64RegClass) {
MBB.addLiveIn(Reg);
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass,
+ &RI);
}
}
@@ -343,7 +346,8 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool
SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -359,7 +363,7 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RegClass = CSI[i].getRegClass();
if (RegClass == &SystemZ::FP64RegClass)
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
}
// Restore GP registers
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index ef3b39e..a753f14 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -63,7 +63,8 @@ public:
bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
@@ -75,18 +76,22 @@ public:
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index 99e396a..42aa5dd 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -1,4 +1,4 @@
-//===- SystemZRegisterInfo.h - SystemZ Register Information Impl ----*- C++ -*-===//
+//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,15 +19,6 @@
namespace llvm {
-namespace SystemZ {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// SystemZRegisterInfo.td file.
- enum SubregIndex {
- SUBREG_32BIT = 1, SUBREG_EVEN = 1, SUBREG_ODD = 2
- };
-}
-
class SystemZSubtarget;
class SystemZInstrInfo;
class Type;
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 8795847..b561744 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -53,6 +53,14 @@ class FPRL<bits<4> num, string n, list<Register> subregs>
field bits<4> Num = num;
}
+let Namespace = "SystemZ" in {
+def subreg_32bit : SubRegIndex;
+def subreg_even32 : SubRegIndex;
+def subreg_odd32 : SubRegIndex;
+def subreg_even : SubRegIndex;
+def subreg_odd : SubRegIndex;
+}
+
// General-purpose registers
def R0W : GPR32< 0, "r0">, DwarfRegNum<[0]>;
def R1W : GPR32< 1, "r1">, DwarfRegNum<[1]>;
@@ -71,6 +79,7 @@ def R13W : GPR32<13, "r13">, DwarfRegNum<[13]>;
def R14W : GPR32<14, "r14">, DwarfRegNum<[14]>;
def R15W : GPR32<15, "r15">, DwarfRegNum<[15]>;
+let SubRegIndices = [subreg_32bit] in {
def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>;
def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>;
def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>;
@@ -87,8 +96,10 @@ def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>;
def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>;
def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>;
def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>;
+}
// Register pairs
+let SubRegIndices = [subreg_even32, subreg_odd32] in {
def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>;
def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>;
def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>;
@@ -97,7 +108,11 @@ def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>, DwarfRegNum<[8]>;
def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>, DwarfRegNum<[10]>;
def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>, DwarfRegNum<[12]>;
def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>;
+}
+let SubRegIndices = [subreg_even, subreg_odd],
+ CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit),
+ (subreg_odd32 subreg_odd, subreg_32bit)] in {
def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>;
def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>;
def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>;
@@ -106,6 +121,7 @@ def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>, DwarfRegNum<[8]>;
def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>, DwarfRegNum<[10]>;
def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>, DwarfRegNum<[12]>;
def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>, DwarfRegNum<[14]>;
+}
// Floating-point registers
def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>;
@@ -125,6 +141,7 @@ def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>;
def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>;
def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>;
+let SubRegIndices = [subreg_32bit] in {
def F0L : FPRL< 0, "f0", [F0S]>, DwarfRegNum<[16]>;
def F1L : FPRL< 1, "f1", [F1S]>, DwarfRegNum<[17]>;
def F2L : FPRL< 2, "f2", [F2S]>, DwarfRegNum<[18]>;
@@ -141,39 +158,11 @@ def F12L : FPRL<12, "f12", [F12S]>, DwarfRegNum<[28]>;
def F13L : FPRL<13, "f13", [F13S]>, DwarfRegNum<[29]>;
def F14L : FPRL<14, "f14", [F14S]>, DwarfRegNum<[30]>;
def F15L : FPRL<15, "f15", [F15S]>, DwarfRegNum<[31]>;
+}
// Status register
def PSW : SystemZReg<"psw">;
-def subreg_32bit : PatLeaf<(i32 1)>;
-def subreg_even32 : PatLeaf<(i32 1)>;
-def subreg_odd32 : PatLeaf<(i32 2)>;
-def subreg_even : PatLeaf<(i32 3)>;
-def subreg_odd : PatLeaf<(i32 4)>;
-
-def : SubRegSet<1, [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<3, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R0D, R2D, R4D, R6D, R8D, R10D, R12D, R14D]>;
-
-def : SubRegSet<4, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R1D, R3D, R5D, R7D, R9D, R11D, R13D, R15D]>;
-
-def : SubRegSet<1, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
- [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P],
- [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
-def : SubRegSet<1, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R0W, R2W, R4W, R6W, R8W, R10W, R12W, R14W]>;
-
-def : SubRegSet<2, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q],
- [R1W, R3W, R5W, R7W, R9W, R11W, R13W, R15W]>;
-
/// Register classes
def GR32 : RegisterClass<"SystemZ", [i32], 32,
// Volatile registers
@@ -276,7 +265,7 @@ def GR64 : RegisterClass<"SystemZ", [i64], 64,
// Volatile, but not allocable
R14D, R15D]>
{
- let SubRegClassList = [GR32];
+ let SubRegClasses = [(GR32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -323,7 +312,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
// Volatile, but not allocable
R14D, R15D]>
{
- let SubRegClassList = [ADDR32];
+ let SubRegClasses = [(ADDR32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -366,7 +355,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
[R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
{
- let SubRegClassList = [GR32, GR32];
+ let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -402,7 +391,8 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
[R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
{
- let SubRegClassList = [GR32, GR32, GR64, GR64];
+ let SubRegClasses = [(GR32 subreg_even32, subreg_odd32),
+ (GR64 subreg_even, subreg_odd)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -462,7 +452,7 @@ def FP32 : RegisterClass<"SystemZ", [f32], 32,
def FP64 : RegisterClass<"SystemZ", [f64], 64,
[F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L,
F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
- let SubRegClassList = [FP32];
+ let SubRegClasses = [(FP32 subreg_32bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
index 87c831b..3eabcd2 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "systemz-selectiondag-info"
-#include "SystemZSelectionDAGInfo.h"
+#include "SystemZTargetMachine.h"
using namespace llvm;
-SystemZSelectionDAGInfo::SystemZSelectionDAGInfo() {
+SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() {
diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
index 5292de9..1450401 100644
--- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
+++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class SystemZTargetMachine;
+
class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- SystemZSelectionDAGInfo();
+ explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM);
~SystemZSelectionDAGInfo();
};
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index dfa26a1..f45827b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -29,7 +29,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
Subtarget(TT, FS),
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
"-f64:64:64-f128:128:128-a0:16:16-n32:64"),
- InstrInfo(*this), TLInfo(*this),
+ InstrInfo(*this), TLInfo(*this), TSInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
if (getRelocationModel() == Reloc::Default)
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index d3357cc..6af829b 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -17,6 +17,7 @@
#include "SystemZInstrInfo.h"
#include "SystemZISelLowering.h"
+#include "SystemZSelectionDAGInfo.h"
#include "SystemZRegisterInfo.h"
#include "SystemZSubtarget.h"
#include "llvm/Target/TargetData.h"
@@ -32,6 +33,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
const TargetData DataLayout; // Calculates type size & alignment
SystemZInstrInfo InstrInfo;
SystemZTargetLowering TLInfo;
+ SystemZSelectionDAGInfo TSInfo;
// SystemZ does not have any call stack frame, therefore not having
// any SystemZ specific FrameInfo class.
@@ -53,6 +55,10 @@ public:
return &TLInfo;
}
+ virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
}; // SystemZTargetMachine.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index ac67c91..df52368 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -212,7 +212,8 @@ FunctionSections("ffunction-sections",
//
TargetMachine::TargetMachine(const Target &T)
- : TheTarget(T), AsmInfo(0) {
+ : TheTarget(T), AsmInfo(0),
+ MCRelaxAll(false) {
// Typically it will be subtargets that will adjust FloatABIType from Default
// to Soft or Hard.
if (UseSoftFloat)
@@ -273,13 +274,14 @@ namespace llvm {
/// DisableFramePointerElim - This returns true if frame pointer elimination
/// optimization should be disabled for the given machine function.
bool DisableFramePointerElim(const MachineFunction &MF) {
- if (NoFramePointerElim)
- return true;
- if (NoFramePointerElimNonLeaf) {
+ // Check to see if we should eliminate non-leaf frame pointers and then
+ // check to see if we should eliminate all frame pointers.
+ if (NoFramePointerElimNonLeaf && !NoFramePointerElim) {
const MachineFrameInfo *MFI = MF.getFrameInfo();
return MFI->hasCalls();
}
- return false;
+
+ return NoFramePointerElim;
}
/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 52983ff..dcc5f61 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -22,14 +22,14 @@ using namespace llvm;
TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
regclass_iterator RCB, regclass_iterator RCE,
+ const char *const *subregindexnames,
int CFSO, int CFDO,
const unsigned* subregs, const unsigned subregsize,
- const unsigned* superregs, const unsigned superregsize,
const unsigned* aliases, const unsigned aliasessize)
: SubregHash(subregs), SubregHashSize(subregsize),
- SuperregHash(superregs), SuperregHashSize(superregsize),
AliasesHash(aliases), AliasesHashSize(aliasessize),
- Desc(D), NumRegs(NR), RegClassBegin(RCB), RegClassEnd(RCE) {
+ Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+ RegClassBegin(RCB), RegClassEnd(RCE) {
assert(NumRegs < FirstVirtualRegister &&
"Target has too many physical registers!");
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6b403c1..40a6a7b 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -186,20 +186,73 @@ struct X86Operand : public MCParsedAsmOperand {
bool isImm() const { return Kind == Immediate; }
- bool isImmSExt8() const {
- // Accept immediates which fit in 8 bits when sign extended, and
- // non-absolute immediates.
+ bool isImmSExti16i8() const {
if (!isImm())
return false;
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm())) {
- int64_t Value = CE->getValue();
- return Value == (int64_t) (int8_t) Value;
- }
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
- return true;
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x000000000000FF80ULL <= Value && Value <= 0x000000000000FFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
}
-
+ bool isImmSExti32i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0x00000000FFFFFF80ULL <= Value && Value <= 0x00000000FFFFFFFFULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i8() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000000000007FULL)||
+ (0xFFFFFFFFFFFFFF80ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+ bool isImmSExti64i32() const {
+ if (!isImm())
+ return false;
+
+ // If this isn't a constant expr, just assume it fits and let relaxation
+ // handle it.
+ const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+ if (!CE)
+ return true;
+
+ // Otherwise, check the value is in a range that makes sense for this
+ // extension.
+ uint64_t Value = CE->getValue();
+ return (( Value <= 0x000000007FFFFFFFULL)||
+ (0xFFFFFFFF80000000ULL <= Value && Value <= 0xFFFFFFFFFFFFFFFFULL));
+ }
+
bool isMem() const { return Kind == Memory; }
bool isAbsMem() const {
@@ -231,12 +284,6 @@ struct X86Operand : public MCParsedAsmOperand {
addExpr(Inst, getImm());
}
- void addImmSExt8Operands(MCInst &Inst, unsigned N) const {
- // FIXME: Support user customization of the render method.
- assert(N == 1 && "Invalid number of operands!");
- addExpr(Inst, getImm());
- }
-
void addMemOperands(MCInst &Inst, unsigned N) const {
assert((N == 5) && "Invalid number of operands!");
Inst.addOperand(MCOperand::CreateReg(getMemBaseReg()));
@@ -535,6 +582,21 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
bool X86ATTAsmParser::
ParseInstruction(const StringRef &Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
+ // The various flavors of pushf and popf use Requires<In32BitMode> and
+ // Requires<In64BitMode>, but the assembler doesn't yet implement that.
+ // For now, just do a manual check to prevent silent misencoding.
+ if (Is64Bit) {
+ if (Name == "popfl")
+ return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
+ else if (Name == "pushfl")
+ return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+ } else {
+ if (Name == "popfq")
+ return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
+ else if (Name == "pushfq")
+ return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode");
+ }
+
// FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to
// represent alternative syntaxes in the .td file, without requiring
// instruction duplication.
@@ -547,9 +609,66 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
.Case("repe", "rep")
.Case("repz", "rep")
.Case("repnz", "repne")
+ .Case("pushf", Is64Bit ? "pushfq" : "pushfl")
+ .Case("popf", Is64Bit ? "popfq" : "popfl")
+ .Case("retl", Is64Bit ? "retl" : "ret")
+ .Case("retq", Is64Bit ? "ret" : "retq")
+ .Case("setz", "sete")
+ .Case("setnz", "setne")
+ .Case("jz", "je")
+ .Case("jnz", "jne")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovcl", "cmovbl")
+ .Case("cmovnal", "cmovbel")
+ .Case("cmovnbl", "cmovael")
+ .Case("cmovnbel", "cmoval")
+ .Case("cmovncl", "cmovael")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovnl", "cmovgel")
+ .Case("cmovngl", "cmovlel")
+ .Case("cmovngel", "cmovll")
+ .Case("cmovnll", "cmovgel")
+ .Case("cmovnlel", "cmovgl")
+ .Case("cmovnzl", "cmovnel")
+ .Case("cmovzl", "cmovel")
.Default(Name);
+
+ // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
+ const MCExpr *ExtraImmOp = 0;
+ if (PatchedName.startswith("cmp") &&
+ (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
+ PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
+ unsigned SSEComparisonCode = StringSwitch<unsigned>(
+ PatchedName.slice(3, PatchedName.size() - 2))
+ .Case("eq", 0)
+ .Case("lt", 1)
+ .Case("le", 2)
+ .Case("unord", 3)
+ .Case("neq", 4)
+ .Case("nlt", 5)
+ .Case("nle", 6)
+ .Case("ord", 7)
+ .Default(~0U);
+ if (SSEComparisonCode != ~0U) {
+ ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode,
+ getParser().getContext());
+ if (PatchedName.endswith("ss")) {
+ PatchedName = "cmpss";
+ } else if (PatchedName.endswith("sd")) {
+ PatchedName = "cmpsd";
+ } else if (PatchedName.endswith("ps")) {
+ PatchedName = "cmpps";
+ } else {
+ assert(PatchedName.endswith("pd") && "Unexpected mnemonic!");
+ PatchedName = "cmppd";
+ }
+ }
+ }
Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
+ if (ExtraImmOp)
+ Operands.push_back(X86Operand::CreateImm(ExtraImmOp, NameLoc, NameLoc));
+
if (getLexer().isNot(AsmToken::EndOfStatement)) {
// Parse '*' modifier.
@@ -564,7 +683,7 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.push_back(Op);
else
return true;
-
+
while (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // Eat the comma.
@@ -587,6 +706,17 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
+ // FIXME: Hack to handle "f{mul*,add*,sub*,div*} $op, st(0)" the same as
+ // "f{mul*,add*,sub*,div*} $op"
+ if ((Name.startswith("fmul") || Name.startswith("fadd") ||
+ Name.startswith("fsub") || Name.startswith("fdiv")) &&
+ Operands.size() == 3 &&
+ static_cast<X86Operand*>(Operands[2])->isReg() &&
+ static_cast<X86Operand*>(Operands[2])->getReg() == X86::ST0) {
+ delete Operands[2];
+ Operands.erase(Operands.begin() + 2);
+ }
+
return false;
}
@@ -622,6 +752,31 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
return false;
}
+/// LowerMOffset - Lower an 'moffset' form of an instruction, which just has a
+/// imm operand, to having "rm" or "mr" operands with the offset in the disp
+/// field.
+static void LowerMOffset(MCInst &Inst, unsigned Opc, unsigned RegNo,
+ bool isMR) {
+ MCOperand Disp = Inst.getOperand(0);
+
+ // Start over with an empty instruction.
+ Inst = MCInst();
+ Inst.setOpcode(Opc);
+
+ if (!isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+
+ // Add the mem operand.
+ Inst.addOperand(MCOperand::CreateReg(0)); // Segment
+ Inst.addOperand(MCOperand::CreateImm(1)); // Scale
+ Inst.addOperand(MCOperand::CreateReg(0)); // IndexReg
+ Inst.addOperand(Disp); // Displacement
+ Inst.addOperand(MCOperand::CreateReg(0)); // BaseReg
+
+ if (isMR)
+ Inst.addOperand(MCOperand::CreateReg(RegNo));
+}
+
// FIXME: Custom X86 cleanup function to implement a temporary hack to handle
// matching INCL/DECL correctly for x86_64. This needs to be replaced by a
// proper mechanism for supporting (ambiguous) feature dependent instructions.
@@ -637,6 +792,14 @@ void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) {
case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break;
case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break;
case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break;
+
+ // moffset instructions are x86-32 only.
+ case X86::MOV8o8a: LowerMOffset(Inst, X86::MOV8rm , X86::AL , false); break;
+ case X86::MOV16o16a: LowerMOffset(Inst, X86::MOV16rm, X86::AX , false); break;
+ case X86::MOV32o32a: LowerMOffset(Inst, X86::MOV32rm, X86::EAX, false); break;
+ case X86::MOV8ao8: LowerMOffset(Inst, X86::MOV8mr , X86::AL , true); break;
+ case X86::MOV16ao16: LowerMOffset(Inst, X86::MOV16mr, X86::AX , true); break;
+ case X86::MOV32ao32: LowerMOffset(Inst, X86::MOV32mr, X86::EAX, true); break;
}
}
@@ -673,6 +836,8 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
bool MatchW = MatchInstructionImpl(Operands, Inst);
Tmp[Base.size()] = 'l';
bool MatchL = MatchInstructionImpl(Operands, Inst);
+ Tmp[Base.size()] = 'q';
+ bool MatchQ = MatchInstructionImpl(Operands, Inst);
// Restore the old token.
Op->setTokenValue(Base);
@@ -680,7 +845,7 @@ X86ATTAsmParser::MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*>
// If exactly one matched, then we treat that as a successful match (and the
// instruction will already have been filled in correctly, since the failing
// matches won't have modified it).
- if (MatchB + MatchW + MatchL == 2)
+ if (MatchB + MatchW + MatchL + MatchQ == 3)
return false;
// Otherwise, the match failed.
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 8b0ed1c..183213d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -58,12 +58,11 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
SetupMachineFunction(MF);
if (Subtarget->isTargetCOFF()) {
- const Function *F = MF.getFunction();
- OutStreamer.EmitRawText("\t.def\t " + Twine(CurrentFnSym->getName()) +
- ";\t.scl\t" +
- Twine(F->hasInternalLinkage() ? COFF::C_STAT : COFF::C_EXT) +
- ";\t.type\t" + Twine(COFF::DT_FCN << COFF::N_BTSHFT)
- + ";\t.endef");
+ bool Intrn = MF.getFunction()->hasInternalLinkage();
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
// Have common code print out the function header with linkage info etc.
@@ -571,44 +570,55 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
MMI->getObjFileInfo<X86COFFMachineModuleInfo>();
// Emit type information for external functions
- for (X86COFFMachineModuleInfo::stub_iterator I = COFFMMI.stub_begin(),
- E = COFFMMI.stub_end(); I != E; ++I) {
- OutStreamer.EmitRawText("\t.def\t " + Twine(I->getKeyData()) +
- ";\t.scl\t" + Twine(COFF::C_EXT) +
- ";\t.type\t" +
- Twine(COFF::DT_FCN << COFF::N_BTSHFT) +
- ";\t.endef");
+ typedef X86COFFMachineModuleInfo::externals_iterator externals_iterator;
+ for (externals_iterator I = COFFMMI.externals_begin(),
+ E = COFFMMI.externals_end();
+ I != E; ++I) {
+ OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
+ OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EndCOFFSymbolDef();
}
- if (Subtarget->isTargetCygMing()) {
- // Necessary for dllexport support
- std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
-
- const TargetLoweringObjectFileCOFF &TLOFCOFF =
- static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
-
- for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedFns.push_back(Mang->getSymbol(I));
-
- for (Module::const_global_iterator I = M.global_begin(),
- E = M.global_end(); I != E; ++I)
- if (I->hasDLLExportLinkage())
- DLLExportedGlobals.push_back(Mang->getSymbol(I));
-
- // Output linker support code for dllexported globals on windows.
- if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
- OutStreamer.SwitchSection(TLOFCOFF.getCOFFSection(".section .drectve",
- true,
- SectionKind::getMetadata()));
- for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedGlobals[i]->getName()) +
- ",data\"");
-
- for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i)
- OutStreamer.EmitRawText("\t.ascii \" -export:" +
- Twine(DLLExportedFns[i]->getName()) + "\"");
+ // Necessary for dllexport support
+ std::vector<const MCSymbol*> DLLExportedFns, DLLExportedGlobals;
+
+ const TargetLoweringObjectFileCOFF &TLOFCOFF =
+ static_cast<const TargetLoweringObjectFileCOFF&>(getObjFileLowering());
+
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedFns.push_back(Mang->getSymbol(I));
+
+ for (Module::const_global_iterator I = M.global_begin(),
+ E = M.global_end(); I != E; ++I)
+ if (I->hasDLLExportLinkage())
+ DLLExportedGlobals.push_back(Mang->getSymbol(I));
+
+ // Output linker support code for dllexported globals on windows.
+ if (!DLLExportedGlobals.empty() || !DLLExportedFns.empty()) {
+ OutStreamer.SwitchSection(TLOFCOFF.getDrectveSection());
+ SmallString<128> name;
+ for (unsigned i = 0, e = DLLExportedGlobals.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedGlobals[i]->getName();
+ if (Subtarget->isTargetWindows())
+ name += ",DATA";
+ else
+ name += ",data";
+ OutStreamer.EmitBytes(name, 0);
+ }
+
+ for (unsigned i = 0, e = DLLExportedFns.size(); i != e; ++i) {
+ if (Subtarget->isTargetWindows())
+ name = " /EXPORT:";
+ else
+ name = " -export:";
+ name += DLLExportedFns[i]->getName();
+ OutStreamer.EmitBytes(name, 0);
}
}
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
index 95984b2..b5a7f8d 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.h
@@ -31,7 +31,7 @@ class MCInst;
class MCStreamer;
class MCSymbol;
-class VISIBILITY_HIDDEN X86AsmPrinter : public AsmPrinter {
+class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
const X86Subtarget *Subtarget;
public:
explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index effc8ed..4edeca9 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -224,6 +224,60 @@ static void LowerUnaryToTwoAddr(MCInst &OutMI, unsigned NewOpc) {
OutMI.addOperand(OutMI.getOperand(0));
}
+/// \brief Simplify FOO $imm, %{al,ax,eax,rax} to FOO $imm, for instruction with
+/// a short fixed-register form.
+static void SimplifyShortImmForm(MCInst &Inst, unsigned Opcode) {
+ unsigned ImmOp = Inst.getNumOperands() - 1;
+ assert(Inst.getOperand(0).isReg() && Inst.getOperand(ImmOp).isImm() &&
+ ((Inst.getNumOperands() == 3 && Inst.getOperand(1).isReg() &&
+ Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg()) ||
+ Inst.getNumOperands() == 2) && "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(0).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(ImmOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
+
+/// \brief Simplify things like MOV32rm to MOV32o32a.
+static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) {
+ bool IsStore = Inst.getOperand(0).isReg() && Inst.getOperand(1).isReg();
+ unsigned AddrBase = IsStore;
+ unsigned RegOp = IsStore ? 0 : 5;
+ unsigned AddrOp = AddrBase + 3;
+ assert(Inst.getNumOperands() == 6 && Inst.getOperand(RegOp).isReg() &&
+ Inst.getOperand(AddrBase + 0).isReg() && // base
+ Inst.getOperand(AddrBase + 1).isImm() && // scale
+ Inst.getOperand(AddrBase + 2).isReg() && // index register
+ (Inst.getOperand(AddrOp).isExpr() || // address
+ Inst.getOperand(AddrOp).isImm())&&
+ Inst.getOperand(AddrBase + 4).isReg() && // segment
+ "Unexpected instruction!");
+
+ // Check whether the destination register can be fixed.
+ unsigned Reg = Inst.getOperand(RegOp).getReg();
+ if (Reg != X86::AL && Reg != X86::AX && Reg != X86::EAX && Reg != X86::RAX)
+ return;
+
+ // Check whether this is an absolute address.
+ if (Inst.getOperand(AddrBase + 0).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 2).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 4).getReg() != 0 ||
+ Inst.getOperand(AddrBase + 1).getImm() != 1)
+ return;
+
+ // If so, rewrite the instruction.
+ MCOperand Saved = Inst.getOperand(AddrOp);
+ Inst = MCInst();
+ Inst.setOpcode(Opcode);
+ Inst.addOperand(Saved);
+}
void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
OutMI.setOpcode(MI->getOpcode());
@@ -309,8 +363,32 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerSubReg32_Op0(OutMI, X86::MOV32r0); // MOV64r0 -> MOV32r0
LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr
break;
-
-
+
+ // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have
+ // register inputs modeled as normal uses instead of implicit uses. As such,
+ // truncate off all but the first operand (the callee). FIXME: Change isel.
+ case X86::TAILJMPr:
+ case X86::TAILJMPr64:
+ case X86::CALL64r:
+ case X86::CALL64pcrel32: {
+ unsigned Opcode = OutMI.getOpcode();
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(Opcode);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
+ // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
+ case X86::TAILJMPd:
+ case X86::TAILJMPd64: {
+ MCOperand Saved = OutMI.getOperand(0);
+ OutMI = MCInst();
+ OutMI.setOpcode(X86::TAILJMP_1);
+ OutMI.addOperand(Saved);
+ break;
+ }
+
// The assembler backend wants to see branches in their small form and relax
// them to their large form. The JIT can only handle the large form because
// it does not do relaxation. For now, translate the large form to the
@@ -332,6 +410,61 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case X86::JGE_4: OutMI.setOpcode(X86::JGE_1); break;
case X86::JLE_4: OutMI.setOpcode(X86::JLE_1); break;
case X86::JG_4: OutMI.setOpcode(X86::JG_1); break;
+
+ // We don't currently select the correct instruction form for instructions
+ // which have a short %eax, etc. form. Handle this by custom lowering, for
+ // now.
+ //
+ // Note, we are currently not handling the following instructions:
+ // MOV64ao8, MOV64o8a
+ // XCHG16ar, XCHG32ar, XCHG64ar
+ case X86::MOV8mr_NOREX:
+ case X86::MOV8mr: SimplifyShortMoveForm(OutMI, X86::MOV8ao8); break;
+ case X86::MOV8rm_NOREX:
+ case X86::MOV8rm: SimplifyShortMoveForm(OutMI, X86::MOV8o8a); break;
+ case X86::MOV16mr: SimplifyShortMoveForm(OutMI, X86::MOV16ao16); break;
+ case X86::MOV16rm: SimplifyShortMoveForm(OutMI, X86::MOV16o16a); break;
+ case X86::MOV32mr: SimplifyShortMoveForm(OutMI, X86::MOV32ao32); break;
+ case X86::MOV32rm: SimplifyShortMoveForm(OutMI, X86::MOV32o32a); break;
+ case X86::MOV64mr: SimplifyShortMoveForm(OutMI, X86::MOV64ao64); break;
+ case X86::MOV64rm: SimplifyShortMoveForm(OutMI, X86::MOV64o64a); break;
+
+ case X86::ADC8ri: SimplifyShortImmForm(OutMI, X86::ADC8i8); break;
+ case X86::ADC16ri: SimplifyShortImmForm(OutMI, X86::ADC16i16); break;
+ case X86::ADC32ri: SimplifyShortImmForm(OutMI, X86::ADC32i32); break;
+ case X86::ADC64ri32: SimplifyShortImmForm(OutMI, X86::ADC64i32); break;
+ case X86::ADD8ri: SimplifyShortImmForm(OutMI, X86::ADD8i8); break;
+ case X86::ADD16ri: SimplifyShortImmForm(OutMI, X86::ADD16i16); break;
+ case X86::ADD32ri: SimplifyShortImmForm(OutMI, X86::ADD32i32); break;
+ case X86::ADD64ri32: SimplifyShortImmForm(OutMI, X86::ADD64i32); break;
+ case X86::AND8ri: SimplifyShortImmForm(OutMI, X86::AND8i8); break;
+ case X86::AND16ri: SimplifyShortImmForm(OutMI, X86::AND16i16); break;
+ case X86::AND32ri: SimplifyShortImmForm(OutMI, X86::AND32i32); break;
+ case X86::AND64ri32: SimplifyShortImmForm(OutMI, X86::AND64i32); break;
+ case X86::CMP8ri: SimplifyShortImmForm(OutMI, X86::CMP8i8); break;
+ case X86::CMP16ri: SimplifyShortImmForm(OutMI, X86::CMP16i16); break;
+ case X86::CMP32ri: SimplifyShortImmForm(OutMI, X86::CMP32i32); break;
+ case X86::CMP64ri32: SimplifyShortImmForm(OutMI, X86::CMP64i32); break;
+ case X86::OR8ri: SimplifyShortImmForm(OutMI, X86::OR8i8); break;
+ case X86::OR16ri: SimplifyShortImmForm(OutMI, X86::OR16i16); break;
+ case X86::OR32ri: SimplifyShortImmForm(OutMI, X86::OR32i32); break;
+ case X86::OR64ri32: SimplifyShortImmForm(OutMI, X86::OR64i32); break;
+ case X86::SBB8ri: SimplifyShortImmForm(OutMI, X86::SBB8i8); break;
+ case X86::SBB16ri: SimplifyShortImmForm(OutMI, X86::SBB16i16); break;
+ case X86::SBB32ri: SimplifyShortImmForm(OutMI, X86::SBB32i32); break;
+ case X86::SBB64ri32: SimplifyShortImmForm(OutMI, X86::SBB64i32); break;
+ case X86::SUB8ri: SimplifyShortImmForm(OutMI, X86::SUB8i8); break;
+ case X86::SUB16ri: SimplifyShortImmForm(OutMI, X86::SUB16i16); break;
+ case X86::SUB32ri: SimplifyShortImmForm(OutMI, X86::SUB32i32); break;
+ case X86::SUB64ri32: SimplifyShortImmForm(OutMI, X86::SUB64i32); break;
+ case X86::TEST8ri: SimplifyShortImmForm(OutMI, X86::TEST8i8); break;
+ case X86::TEST16ri: SimplifyShortImmForm(OutMI, X86::TEST16i16); break;
+ case X86::TEST32ri: SimplifyShortImmForm(OutMI, X86::TEST32i32); break;
+ case X86::TEST64ri32: SimplifyShortImmForm(OutMI, X86::TEST64i32); break;
+ case X86::XOR8ri: SimplifyShortImmForm(OutMI, X86::XOR8i8); break;
+ case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break;
+ case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break;
+ case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break;
}
}
@@ -346,7 +479,7 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
// cast away const; DIetc do not take const operands for some reason.
DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
if (V.getContext().isSubprogram())
- O << DISubprogram(V.getContext().getNode()).getDisplayName() << ":";
+ O << DISubprogram(V.getContext()).getDisplayName() << ":";
O << V.getName();
O << " <- ";
// Frame address. Currently handles register +- offset only.
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
index ebd23f6..9e5474f 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h
@@ -25,7 +25,7 @@ namespace llvm {
class X86Subtarget;
/// X86MCInstLower - This class is used to lower an MachineInstr into an MCInst.
-class VISIBILITY_HIDDEN X86MCInstLower {
+class LLVM_LIBRARY_VISIBILITY X86MCInstLower {
MCContext &Ctx;
Mangler *Mang;
X86AsmPrinter &AsmPrinter;
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index da6bb91..1334820 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -39,7 +39,12 @@ set(sources
if( CMAKE_CL_64 )
enable_language(ASM_MASM)
- set(sources ${sources} X86CompilationCallback_Win64.asm)
+ ADD_CUSTOM_COMMAND(
+ OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj
+ COMMAND ${CMAKE_ASM_MASM_COMPILER} /Fo ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj /c ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/X86CompilationCallback_Win64.asm
+ )
+ set(sources ${sources} ${CMAKE_CURRENT_BINARY_DIR}/X86CompilationCallback_Win64.obj)
endif()
add_llvm_target(X86CodeGen ${sources})
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 62e7357..8a5a630 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -155,7 +155,57 @@ static void translateRegister(MCInst &mcInst, Reg reg) {
///
/// @param mcInst - The MCInst to append to.
/// @param immediate - The immediate value to append.
-static void translateImmediate(MCInst &mcInst, uint64_t immediate) {
+/// @param operand - The operand, as stored in the descriptor table.
+/// @param insn - The internal instruction.
+static void translateImmediate(MCInst &mcInst,
+ uint64_t immediate,
+ OperandSpecifier &operand,
+ InternalInstruction &insn) {
+ // Sign-extend the immediate if necessary.
+
+ OperandType type = operand.type;
+
+ if (type == TYPE_RELv) {
+ switch (insn.displacementSize) {
+ default:
+ break;
+ case 8:
+ type = TYPE_MOFFS8;
+ break;
+ case 16:
+ type = TYPE_MOFFS16;
+ break;
+ case 32:
+ type = TYPE_MOFFS32;
+ break;
+ case 64:
+ type = TYPE_MOFFS64;
+ break;
+ }
+ }
+
+ switch (type) {
+ case TYPE_MOFFS8:
+ case TYPE_REL8:
+ if(immediate & 0x80)
+ immediate |= ~(0xffull);
+ break;
+ case TYPE_MOFFS16:
+ if(immediate & 0x8000)
+ immediate |= ~(0xffffull);
+ break;
+ case TYPE_MOFFS32:
+ case TYPE_REL32:
+ case TYPE_REL64:
+ if(immediate & 0x80000000)
+ immediate |= ~(0xffffffffull);
+ break;
+ case TYPE_MOFFS64:
+ default:
+ // operand is 64 bits wide. Do nothing.
+ break;
+ }
+
mcInst.addOperand(MCOperand::CreateImm(immediate));
}
@@ -370,8 +420,7 @@ static bool translateRM(MCInst &mcInst,
case TYPE_XMM64:
case TYPE_XMM128:
case TYPE_DEBUGREG:
- case TYPE_CR32:
- case TYPE_CR64:
+ case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
case TYPE_M:
case TYPE_M8:
@@ -447,8 +496,10 @@ static bool translateOperand(MCInst &mcInst,
case ENCODING_IO:
case ENCODING_Iv:
case ENCODING_Ia:
- translateImmediate(mcInst,
- insn.immediates[insn.numImmediatesTranslated++]);
+ translateImmediate(mcInst,
+ insn.immediates[insn.numImmediatesTranslated++],
+ operand,
+ insn);
return false;
case ENCODING_RB:
case ENCODING_RW:
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index 64f6b2d..6c3ff6b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -1034,14 +1034,10 @@ static int readModRM(struct InternalInstruction* insn) {
if (index > 7) \
*valid = 0; \
return prefix##_DR0 + index; \
- case TYPE_CR32: \
- if (index > 7) \
- *valid = 0; \
- return prefix##_ECR0 + index; \
- case TYPE_CR64: \
+ case TYPE_CONTROLREG: \
if (index > 8) \
*valid = 0; \
- return prefix##_RCR0 + index; \
+ return prefix##_CR0 + index; \
} \
}
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 462cf68..28ba86b 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -225,26 +225,16 @@ extern "C" {
ENTRY(DR6) \
ENTRY(DR7)
-#define REGS_CONTROL_32BIT \
- ENTRY(ECR0) \
- ENTRY(ECR1) \
- ENTRY(ECR2) \
- ENTRY(ECR3) \
- ENTRY(ECR4) \
- ENTRY(ECR5) \
- ENTRY(ECR6) \
- ENTRY(ECR7)
-
-#define REGS_CONTROL_64BIT \
- ENTRY(RCR0) \
- ENTRY(RCR1) \
- ENTRY(RCR2) \
- ENTRY(RCR3) \
- ENTRY(RCR4) \
- ENTRY(RCR5) \
- ENTRY(RCR6) \
- ENTRY(RCR7) \
- ENTRY(RCR8)
+#define REGS_CONTROL \
+ ENTRY(CR0) \
+ ENTRY(CR1) \
+ ENTRY(CR2) \
+ ENTRY(CR3) \
+ ENTRY(CR4) \
+ ENTRY(CR5) \
+ ENTRY(CR6) \
+ ENTRY(CR7) \
+ ENTRY(CR8)
#define ALL_EA_BASES \
EA_BASES_16BIT \
@@ -264,8 +254,7 @@ extern "C" {
REGS_XMM \
REGS_SEGMENT \
REGS_DEBUG \
- REGS_CONTROL_32BIT \
- REGS_CONTROL_64BIT \
+ REGS_CONTROL \
ENTRY(RIP)
/*
diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 4a7cd57..0f33f52 100644
--- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -280,8 +280,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
- ENUM_ENTRY(TYPE_CR32, "4-byte control register operand") \
- ENUM_ENTRY(TYPE_CR64, "8-byte") \
+ ENUM_ENTRY(TYPE_CONTROLREG, "Control register operand") \
\
ENUM_ENTRY(TYPE_Mv, "Memory operand of operand size") \
ENUM_ENTRY(TYPE_Rv, "Register operand of operand size") \
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
index 5e80845..dab070e 100644
--- a/lib/Target/X86/SSEDomainFix.cpp
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -155,9 +155,7 @@ char SSEDomainFixPass::ID = 0;
/// Translate TRI register number to an index into our smaller tables of
/// interesting registers. Return -1 for boring registers.
int SSEDomainFixPass::RegIndex(unsigned reg) {
- // Registers are sorted lexicographically.
- // We just need them to be consecutive, ordering doesn't matter.
- assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ assert(X86::XMM15 == X86::XMM0+NumRegs-1 && "Unexpected sort");
reg -= X86::XMM0;
return reg < NumRegs ? (int) reg : -1;
}
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index ba9c1d0..151087f 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -12,6 +12,7 @@
#include "X86FixupKinds.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -43,20 +44,19 @@ public:
X86AsmBackend(const Target &T)
: TargetAsmBackend(T) {}
- void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF,
+ void ApplyFixup(const MCFixup &Fixup, MCDataFragment &DF,
uint64_t Value) const {
- unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind);
+ unsigned Size = 1 << getFixupKindLog2Size(Fixup.getKind());
- assert(Fixup.Offset + Size <= DF.getContents().size() &&
+ assert(Fixup.getOffset() + Size <= DF.getContents().size() &&
"Invalid fixup offset!");
for (unsigned i = 0; i != Size; ++i)
- DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
+ DF.getContents()[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));
}
- bool MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+ bool MayNeedRelaxation(const MCInst &Inst) const;
- void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+ void RelaxInstruction(const MCInst &Inst, MCInst &Res) const;
bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
@@ -75,6 +75,7 @@ static unsigned getRelaxedOpcode(unsigned Op) {
case X86::JG_1: return X86::JG_4;
case X86::JLE_1: return X86::JLE_4;
case X86::JL_1: return X86::JL_4;
+ case X86::TAILJMP_1:
case X86::JMP_1: return X86::JMP_4;
case X86::JNE_1: return X86::JNE_4;
case X86::JNO_1: return X86::JNO_4;
@@ -86,35 +87,33 @@ static unsigned getRelaxedOpcode(unsigned Op) {
}
}
-bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
- const SmallVectorImpl<MCAsmFixup> &Fixups) const {
- // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
- // this instruction.
- for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
- if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
- assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
- return true;
- }
- }
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
+ // Check if this instruction is ever relaxable.
+ if (getRelaxedOpcode(Inst.getOpcode()) == Inst.getOpcode())
+ return false;
- return false;
+ // If so, just assume it can be relaxed. Once we support relaxing more complex
+ // instructions we should check that the instruction actually has symbolic
+ // operands before doing this, but we need to be careful about things like
+ // PCrel.
+ return true;
}
// FIXME: Can tblgen help at all here to verify there aren't other instructions
// we can relax?
-void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
- MCInst &Res) const {
+void X86AsmBackend::RelaxInstruction(const MCInst &Inst, MCInst &Res) const {
// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
- unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+ unsigned RelaxedOp = getRelaxedOpcode(Inst.getOpcode());
- if (RelaxedOp == IF->getInst().getOpcode()) {
+ if (RelaxedOp == Inst.getOpcode()) {
SmallString<256> Tmp;
raw_svector_ostream OS(Tmp);
- IF->getInst().dump_pretty(OS);
+ Inst.dump_pretty(OS);
+ OS << "\n";
report_fatal_error("unexpected instruction to relax: " + OS.str());
}
- Res = IF->getInst();
+ Res = Inst;
Res.setOpcode(RelaxedOp);
}
@@ -199,6 +198,18 @@ public:
}
};
+class ELFX86_32AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_32AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
+class ELFX86_64AsmBackend : public ELFX86AsmBackend {
+public:
+ ELFX86_64AsmBackend(const Target &T)
+ : ELFX86AsmBackend(T) {}
+};
+
class DarwinX86AsmBackend : public X86AsmBackend {
public:
DarwinX86AsmBackend(const Target &T)
@@ -210,7 +221,8 @@ public:
bool isVirtualSection(const MCSection &Section) const {
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return (SMO.getType() == MCSectionMachO::S_ZEROFILL ||
- SMO.getType() == MCSectionMachO::S_GB_ZEROFILL);
+ SMO.getType() == MCSectionMachO::S_GB_ZEROFILL ||
+ SMO.getType() == MCSectionMachO::S_THREAD_LOCAL_ZEROFILL);
}
};
@@ -247,6 +259,26 @@ public:
const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
return SMO.getType() == MCSectionMachO::S_CSTRING_LITERALS;
}
+
+ virtual bool isSectionAtomizable(const MCSection &Section) const {
+ const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section);
+ // Fixed sized data sections are uniqued, they cannot be diced into atoms.
+ switch (SMO.getType()) {
+ default:
+ return true;
+
+ case MCSectionMachO::S_4BYTE_LITERALS:
+ case MCSectionMachO::S_8BYTE_LITERALS:
+ case MCSectionMachO::S_16BYTE_LITERALS:
+ case MCSectionMachO::S_LITERAL_POINTERS:
+ case MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_LAZY_SYMBOL_POINTERS:
+ case MCSectionMachO::S_MOD_INIT_FUNC_POINTERS:
+ case MCSectionMachO::S_MOD_TERM_FUNC_POINTERS:
+ case MCSectionMachO::S_INTERPOSING:
+ return false;
+ }
+ }
};
}
@@ -257,7 +289,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_32AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_32AsmBackend(T);
}
}
@@ -267,6 +299,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
case Triple::Darwin:
return new DarwinX86_64AsmBackend(T);
default:
- return new ELFX86AsmBackend(T);
+ return new ELFX86_64AsmBackend(T);
}
}
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index eece462..98ab2a6 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -15,7 +15,7 @@
#define X86COFF_MACHINEMODULEINFO_H
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/DenseSet.h"
#include "X86MachineFunctionInfo.h"
namespace llvm {
@@ -25,18 +25,18 @@ namespace llvm {
/// X86COFFMachineModuleInfo - This is a MachineModuleInfoImpl implementation
/// for X86 COFF targets.
class X86COFFMachineModuleInfo : public MachineModuleInfoImpl {
- StringSet<> CygMingStubs;
+ DenseSet<MCSymbol const *> Externals;
public:
X86COFFMachineModuleInfo(const MachineModuleInfo &) {}
virtual ~X86COFFMachineModuleInfo();
- void addExternalFunction(StringRef Name) {
- CygMingStubs.insert(Name);
+ void addExternalFunction(MCSymbol* Symbol) {
+ Externals.insert(Symbol);
}
- typedef StringSet<>::const_iterator stub_iterator;
- stub_iterator stub_begin() const { return CygMingStubs.begin(); }
- stub_iterator stub_end() const { return CygMingStubs.end(); }
+ typedef DenseSet<MCSymbol const *>::const_iterator externals_iterator;
+ externals_iterator externals_begin() const { return Externals.begin(); }
+ externals_iterator externals_end() const { return Externals.end(); }
};
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index fd15efd..a5774e1 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -307,6 +307,20 @@ def CC_X86_32_FastCall : CallingConv<[
CCDelegateTo<CC_X86_32_Common>
]>;
+def CC_X86_32_ThisCall : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // The 'nest' parameter, if any, is passed in EAX.
+ CCIfNest<CCAssignToReg<[EAX]>>,
+
+ // The first integer argument is passed in ECX
+ CCIfType<[i32], CCAssignToReg<[ECX]>>,
+
+ // Otherwise, same as everything else.
+ CCDelegateTo<CC_X86_32_Common>
+]>;
+
def CC_X86_32_FastCC : CallingConv<[
// Handles byval parameters. Note that we can't rely on the delegation
// to CC_X86_32_Common for this because that happens after code that
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index ff9208c..1bc5eb7 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -180,6 +180,8 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -324,7 +326,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
unsigned Src, EVT SrcVT,
unsigned &ResultReg) {
- unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
+ unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
+ Src, /*TODO: Kill=*/false);
if (RR != 0) {
ResultReg = RR;
@@ -416,7 +419,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
(S == 1 || S == 2 || S == 4 || S == 8)) {
// Scaled-index addressing.
Scale = S;
- IndexReg = getRegForGEPIndex(Op);
+ IndexReg = getRegForGEPIndex(Op).first;
if (IndexReg == 0)
return false;
} else
@@ -802,7 +805,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
unsigned ResultReg = getRegForValue(I->getOperand(0));
if (ResultReg == 0) return false;
// Set the high bits to zero.
- ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
+ ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
if (ResultReg == 0) return false;
UpdateValueMap(I, ResultReg);
return true;
@@ -913,7 +916,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
const MachineInstr &MI = *RI;
- if (MI.modifiesRegister(Reg)) {
+ if (MI.definesRegister(Reg)) {
unsigned Src, Dst, SrcSR, DstSR;
if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
@@ -1019,14 +1022,14 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
- TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
+ TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL);
// The shift instruction uses X86::CL. If we defined a super-register
// of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
// we're doing here.
if (CReg != X86::CL)
BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
- .addReg(CReg).addImm(X86::SUBREG_8BIT);
+ .addReg(CReg).addImm(X86::sub_8bit);
unsigned ResultReg = createResultReg(RC);
BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
@@ -1133,7 +1136,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
- CopyReg, X86::SUBREG_8BIT);
+ CopyReg, /*Kill=*/true,
+ X86::sub_8bit);
if (!ResultReg)
return false;
@@ -1436,7 +1440,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
}
case CCValAssign::BCvt: {
unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
- ISD::BIT_CONVERT, Arg);
+ ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false);
assert(BC != 0 && "Failed to emit a bitcast!");
Arg = BC;
ArgVT = VA.getLocVT();
@@ -1447,7 +1451,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (VA.isRegLoc()) {
TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
- Arg, RC, RC);
+ Arg, RC, RC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
RegArgs.push_back(VA.getLocReg());
@@ -1473,7 +1477,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->isPICStyleGOT()) {
TargetRegisterClass *RC = X86::GR32RegisterClass;
unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
- bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
+ bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC,
+ DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
}
@@ -1552,7 +1557,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
unsigned ResultReg = createResultReg(DstRC);
bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
- RVLocs[0].getLocReg(), DstRC, SrcRC);
+ RVLocs[0].getLocReg(), DstRC, SrcRC, DL);
assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
Emitted = true;
if (CopyVT != RVLocs[0].getValVT()) {
diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp
index 541083f..747683d 100644
--- a/lib/Target/X86/X86FloatingPointRegKill.cpp
+++ b/lib/Target/X86/X86FloatingPointRegKill.cpp
@@ -14,7 +14,6 @@
#define DEBUG_TYPE "x86-codegen"
#include "X86.h"
#include "X86InstrInfo.h"
-#include "X86Subtarget.h"
#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -42,12 +41,70 @@ namespace {
virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "X86 FP_REG_KILL inserter"; }
+ virtual const char *getPassName() const {
+ return "X86 FP_REG_KILL inserter";
+ }
};
char FPRegKiller::ID = 0;
}
-FunctionPass *llvm::createX87FPRegKillInserterPass() { return new FPRegKiller(); }
+FunctionPass *llvm::createX87FPRegKillInserterPass() {
+ return new FPRegKiller();
+}
+
+/// isFPStackVReg - Return true if the specified vreg is from a fp stack
+/// register class.
+static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) {
+ if (!TargetRegisterInfo::isVirtualRegister(RegNo))
+ return false;
+
+ switch (MRI.getRegClass(RegNo)->getID()) {
+ default: return false;
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ return true;
+ }
+}
+
+
+/// ContainsFPStackCode - Return true if the specific MBB has floating point
+/// stack code, and thus needs an FP_REG_KILL.
+static bool ContainsFPStackCode(MachineBasicBlock *MBB,
+ const MachineRegisterInfo &MRI) {
+ // Scan the block, looking for instructions that define fp stack vregs.
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+ I != E; ++I) {
+ if (I->getNumOperands() == 0 || !I->getOperand(0).isReg())
+ continue;
+
+ for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
+ if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef())
+ continue;
+
+ if (isFPStackVReg(I->getOperand(op).getReg(), MRI))
+ return true;
+ }
+ }
+
+ // Check PHI nodes in successor blocks. These PHI's will be lowered to have
+ // a copy of the input value in this block, which is a definition of the
+ // value.
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ E = MBB->succ_end(); SI != E; ++ SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator I = SuccBB->begin(), E = SuccBB->end();
+ I != E; ++I) {
+ // All PHI nodes are at the top of the block.
+ if (!I->isPHI()) break;
+
+ if (isFPStackVReg(I->getOperand(0).getReg(), MRI))
+ return true;
+ }
+ }
+
+ return false;
+}
bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// If we are emitting FP stack code, scan the basic block to determine if this
@@ -65,14 +122,13 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
// Fast-path: If nothing is using the x87 registers, we don't need to do
// any scanning.
- MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
if (MRI.getRegClassVirtRegs(X86::RFP80RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP64RegisterClass).empty() &&
MRI.getRegClassVirtRegs(X86::RFP32RegisterClass).empty())
return false;
bool Changed = false;
- const X86Subtarget &Subtarget = MF.getTarget().getSubtarget<X86Subtarget>();
MachineFunction::iterator MBBI = MF.begin();
MachineFunction::iterator EndMBB = MF.end();
for (; MBBI != EndMBB; ++MBBI) {
@@ -87,48 +143,8 @@ bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- bool ContainsFPCode = false;
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- !ContainsFPCode && I != E; ++I) {
- if (I->getNumOperands() != 0 && I->getOperand(0).isReg()) {
- const TargetRegisterClass *clas;
- for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) {
- if (I->getOperand(op).isReg() && I->getOperand(op).isDef() &&
- TargetRegisterInfo::isVirtualRegister(I->getOperand(op).getReg()) &&
- ((clas = MRI.getRegClass(I->getOperand(op).getReg())) ==
- X86::RFP32RegisterClass ||
- clas == X86::RFP64RegisterClass ||
- clas == X86::RFP80RegisterClass)) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Check PHI nodes in successor blocks. These PHI's will be lowered to have
- // a copy of the input value in this block. In SSE mode, we only care about
- // 80-bit values.
- if (!ContainsFPCode) {
- // Final check, check LLVM BB's that are successors to the LLVM BB
- // corresponding to BB for FP PHI nodes.
- const BasicBlock *LLVMBB = MBB->getBasicBlock();
- const PHINode *PN;
- for (succ_const_iterator SI = succ_begin(LLVMBB), E = succ_end(LLVMBB);
- !ContainsFPCode && SI != E; ++SI) {
- for (BasicBlock::const_iterator II = SI->begin();
- (PN = dyn_cast<PHINode>(II)); ++II) {
- if (PN->getType()==Type::getX86_FP80Ty(LLVMBB->getContext()) ||
- (!Subtarget.hasSSE1() && PN->getType()->isFloatingPointTy()) ||
- (!Subtarget.hasSSE2() &&
- PN->getType()==Type::getDoubleTy(LLVMBB->getContext()))) {
- ContainsFPCode = true;
- break;
- }
- }
- }
- }
- // Finally, if we found any FP code, emit the FP_REG_KILL instruction.
- if (ContainsFPCode) {
+ // If we find any FP stack code, emit the FP_REG_KILL instruction.
+ if (ContainsFPStackCode(MBB, MRI)) {
BuildMI(*MBB, MBBI->getFirstTerminator(), DebugLoc(),
MF.getTarget().getInstrInfo()->get(X86::FP_REG_KILL));
++NumFPKill;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index fd8bb1e..0f64383 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1693,7 +1693,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Result,
CurDAG->getTargetConstant(8, MVT::i8)), 0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1834,7 +1834,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(8, MVT::i8)),
0);
// Then truncate it down to i8.
- Result = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Result);
} else {
Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
@@ -1883,7 +1883,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
}
// Extract the l-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
MVT::i8, Reg);
// Emit a testb.
@@ -1912,7 +1912,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
Reg.getValueType(), Reg, RC), 0);
// Extract the h-register.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_8BIT_HI, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
MVT::i8, Reg);
// Emit a testb. No special NOREX tricks are needed since there's
@@ -1930,7 +1930,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 16-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_16BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
MVT::i16, Reg);
// Emit a testw.
@@ -1946,7 +1946,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDValue Reg = N0.getNode()->getOperand(0);
// Extract the 32-bit subregister.
- SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::SUBREG_32BIT, dl,
+ SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
MVT::i32, Reg);
// Emit a testl.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6ce9ab7..b02c33d 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -94,7 +94,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setShiftAmountType(MVT::i8);
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetDarwin()) {
@@ -145,13 +145,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand);
} else if (!UseSoftFloat) {
- if (X86ScalarSSEf64) {
- // We have an impenetrably clever algorithm for ui64->double only.
- setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
- }
+ // We have an algorithm for SSE2->double, and we turn this into a
+ // 64-bit FILD followed by conditional FADD for other targets.
+ setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD for other targets.
- setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
+ setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
}
// Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
@@ -215,9 +214,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
- if (!X86ScalarSSEf64) {
+ if (!X86ScalarSSEf64) {
setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand);
setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand);
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT , MVT::f64 , Expand);
+ // Without SSE, i64->f64 goes through memory; i64->MMX is Legal.
+ if (Subtarget->hasMMX() && !DisableMMX)
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Custom);
+ else
+ setOperationAction(ISD::BIT_CONVERT , MVT::i64 , Expand);
+ }
}
// Scalar integer divide and remainder are lowered to use operations that
@@ -679,6 +686,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v8i8, Custom);
setOperationAction(ISD::VSETCC, MVT::v4i16, Custom);
setOperationAction(ISD::VSETCC, MVT::v2i32, Custom);
+
+ if (!X86ScalarSSEf64 && Subtarget->is64Bit()) {
+ setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom);
+ setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom);
+ }
}
if (!UseSoftFloat && Subtarget->hasSSE1()) {
@@ -1244,10 +1259,8 @@ X86TargetLowering::LowerReturn(SDValue Chain,
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
unsigned Reg = FuncInfo->getSRetReturnReg();
- if (!Reg) {
- Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64));
- FuncInfo->setSRetReturnReg(Reg);
- }
+ assert(Reg &&
+ "SRetReturnReg should have been set in LowerFormalArguments().");
SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag);
@@ -1384,6 +1397,8 @@ bool X86TargetLowering::IsCalleePop(bool IsVarArg,
return !Subtarget->is64Bit();
case CallingConv::X86_FastCall:
return !Subtarget->is64Bit();
+ case CallingConv::X86_ThisCall:
+ return !Subtarget->is64Bit();
case CallingConv::Fast:
return GuaranteedTailCallOpt;
case CallingConv::GHC:
@@ -1405,6 +1420,8 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
if (CC == CallingConv::X86_FastCall)
return CC_X86_32_FastCall;
+ else if (CC == CallingConv::X86_ThisCall)
+ return CC_X86_32_ThisCall;
else if (CC == CallingConv::Fast)
return CC_X86_32_FastCC;
else if (CC == CallingConv::GHC)
@@ -1596,7 +1613,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,
true, false));
}
@@ -1716,7 +1734,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
if (!Is64Bit) {
// RegSaveFrameIndex is X86-64 only.
FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
- if (CallConv == CallingConv::X86_FastCall)
+ if (CallConv == CallingConv::X86_FastCall ||
+ CallConv == CallingConv::X86_ThisCall)
// fastcc functions can't have varargs.
FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
}
@@ -5272,7 +5291,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
- MFI->setHasCalls(true);
+ MFI->setAdjustsStack(true);
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -5462,7 +5481,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
}
SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
- SDValue StackSlot,
+ SDValue StackSlot,
SelectionDAG &DAG) const {
// Build the FILD
DebugLoc dl = Op.getDebugLoc();
@@ -5636,35 +5655,72 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
SDValue N0 = Op.getOperand(0);
DebugLoc dl = Op.getDebugLoc();
- // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't
+ // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
// optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
// the optimization here.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
EVT SrcVT = N0.getValueType();
- if (SrcVT == MVT::i64) {
- // We only handle SSE2 f64 target here; caller can expand the rest.
- if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64)
- return SDValue();
-
+ EVT DstVT = Op.getValueType();
+ if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i64(Op, DAG);
- } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) {
+ else if (SrcVT == MVT::i32 && X86ScalarSSEf64)
return LowerUINT_TO_FP_i32(Op, DAG);
- }
-
- assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!");
// Make a 64-bit buffer, and use it to build an FILD.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64);
- SDValue WordOff = DAG.getConstant(4, getPointerTy());
- SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
- getPointerTy(), StackSlot, WordOff);
- SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ if (SrcVT == MVT::i32) {
+ SDValue WordOff = DAG.getConstant(4, getPointerTy());
+ SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl,
+ getPointerTy(), StackSlot, WordOff);
+ SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ StackSlot, NULL, 0, false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
+ OffsetSlot, NULL, 0, false, false, 0);
+ SDValue Fild = BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ return Fild;
+ }
+
+ assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot, NULL, 0, false, false, 0);
- SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32),
- OffsetSlot, NULL, 0, false, false, 0);
- return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG);
+ // For i64 source, we need to add the appropriate power of 2 if the input
+ // was negative. This is the same as the optimization in
+ // DAGTypeLegalizer::ExpandIntOp_UNIT_TO_FP, and for it to be safe here,
+ // we must be careful to do the computation in x87 extended precision, not
+ // in SSE. (The generic code can't know it's OK to do this, or how to.)
+ SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other);
+ SDValue Ops[] = { Store, StackSlot, DAG.getValueType(MVT::i64) };
+ SDValue Fild = DAG.getNode(X86ISD::FILD, dl, Tys, Ops, 3);
+
+ APInt FF(32, 0x5F800000ULL);
+
+ // Check whether the sign bit is set.
+ SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(MVT::i64),
+ Op.getOperand(0), DAG.getConstant(0, MVT::i64),
+ ISD::SETLT);
+
+ // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits.
+ SDValue FudgePtr = DAG.getConstantPool(
+ ConstantInt::get(*DAG.getContext(), FF.zext(64)),
+ getPointerTy());
+
+ // Get a pointer to FF if the sign bit was set, or to 0 otherwise.
+ SDValue Zero = DAG.getIntPtrConstant(0);
+ SDValue Four = DAG.getIntPtrConstant(4);
+ SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet,
+ Zero, Four);
+ FudgePtr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FudgePtr, Offset);
+
+ // Load the value out, extending it from f32 to f80.
+ // FIXME: Avoid the extend by constructing the right constant pool?
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(),
+ FudgePtr, PseudoSourceValue::getConstantPool(),
+ 0, MVT::f32, false, false, 4);
+ // Extend everything to 80 bits to force it to be done on x87.
+ SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge);
+ return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, DAG.getIntPtrConstant(0));
}
std::pair<SDValue,SDValue> X86TargetLowering::
@@ -6593,221 +6649,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
return DAG.getMergeValues(Ops1, 2, dl);
}
-SDValue
-X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const {
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
-
- // If not DWORD aligned or size is more than the threshold, call the library.
- // The libc version is likely to be faster for these cases. It can use the
- // address value and run time information about the CPU.
- if ((Align & 3) != 0 ||
- !ConstantSize ||
- ConstantSize->getZExtValue() >
- getSubtarget()->getMaxInlineSizeThreshold()) {
- SDValue InFlag(0, 0);
-
- // Check to see if there is a specialized entry-point for memory zeroing.
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
-
- if (const char *bzeroEntry = V &&
- V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
- EVT IntPtr = getPointerTy();
- const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext());
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst;
- Entry.Ty = IntPtrTy;
- Args.push_back(Entry);
- Entry.Node = Size;
- Args.push_back(Entry);
- std::pair<SDValue,SDValue> CallResult =
- LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
- false, false, false, false,
- 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
- return CallResult.second;
- }
-
- // Otherwise have the target-independent code call memset.
- return SDValue();
- }
-
- uint64_t SizeVal = ConstantSize->getZExtValue();
- SDValue InFlag(0, 0);
- EVT AVT;
- SDValue Count;
- ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
- unsigned BytesLeft = 0;
- bool TwoRepStos = false;
- if (ValC) {
- unsigned ValReg;
- uint64_t Val = ValC->getZExtValue() & 255;
-
- // If the value is a constant, then we can potentially use larger sets.
- switch (Align & 3) {
- case 2: // WORD aligned
- AVT = MVT::i16;
- ValReg = X86::AX;
- Val = (Val << 8) | Val;
- break;
- case 0: // DWORD aligned
- AVT = MVT::i32;
- ValReg = X86::EAX;
- Val = (Val << 8) | Val;
- Val = (Val << 16) | Val;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
- AVT = MVT::i64;
- ValReg = X86::RAX;
- Val = (Val << 32) | Val;
- }
- break;
- default: // Byte aligned
- AVT = MVT::i8;
- ValReg = X86::AL;
- Count = DAG.getIntPtrConstant(SizeVal);
- break;
- }
-
- if (AVT.bitsGT(MVT::i8)) {
- unsigned UBytes = AVT.getSizeInBits() / 8;
- Count = DAG.getIntPtrConstant(SizeVal / UBytes);
- BytesLeft = SizeVal % UBytes;
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
- InFlag);
- InFlag = Chain.getValue(1);
- } else {
- AVT = MVT::i8;
- Count = DAG.getIntPtrConstant(SizeVal);
- Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
- InFlag = Chain.getValue(1);
- }
-
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
-
- if (TwoRepStos) {
- InFlag = Chain.getValue(1);
- Count = Size;
- EVT CVT = Count.getValueType();
- SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
- DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
- Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
- X86::ECX,
- Left, InFlag);
- InFlag = Chain.getValue(1);
- Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
- Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
- } else if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT AddrVT = Dst.getValueType();
- EVT SizeVT = Size.getValueType();
-
- Chain = DAG.getMemset(Chain, dl,
- DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
- DAG.getConstant(Offset, AddrVT)),
- Src,
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, DstSV, DstSVOff + Offset);
- }
-
- // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
- return Chain;
-}
-
-SDValue
-X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const {
- // This requires the copy size to be a constant, preferrably
- // within a subtarget-specific limit.
- ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
- if (!ConstantSize)
- return SDValue();
- uint64_t SizeVal = ConstantSize->getZExtValue();
- if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
- return SDValue();
-
- /// If not DWORD aligned, call the library.
- if ((Align & 3) != 0)
- return SDValue();
-
- // DWORD aligned
- EVT AVT = MVT::i32;
- if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
- AVT = MVT::i64;
-
- unsigned UBytes = AVT.getSizeInBits() / 8;
- unsigned CountVal = SizeVal / UBytes;
- SDValue Count = DAG.getIntPtrConstant(CountVal);
- unsigned BytesLeft = SizeVal % UBytes;
-
- SDValue InFlag(0, 0);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
- X86::ECX,
- Count, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
- X86::EDI,
- Dst, InFlag);
- InFlag = Chain.getValue(1);
- Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
- X86::ESI,
- Src, InFlag);
- InFlag = Chain.getValue(1);
-
- SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
- SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
- array_lengthof(Ops));
-
- SmallVector<SDValue, 4> Results;
- Results.push_back(RepMovs);
- if (BytesLeft) {
- // Handle the last 1 - 7 bytes.
- unsigned Offset = SizeVal - BytesLeft;
- EVT DstVT = Dst.getValueType();
- EVT SrcVT = Src.getValueType();
- EVT SizeVT = Size.getValueType();
- Results.push_back(DAG.getMemcpy(Chain, dl,
- DAG.getNode(ISD::ADD, dl, DstVT, Dst,
- DAG.getConstant(Offset, DstVT)),
- DAG.getNode(ISD::ADD, dl, SrcVT, Src,
- DAG.getConstant(Offset, SrcVT)),
- DAG.getConstant(BytesLeft, SizeVT),
- Align, isVolatile, AlwaysInline,
- DstSV, DstSVOff + Offset,
- SrcSV, SrcSVOff + Offset));
- }
-
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
- &Results[0], Results.size());
-}
-
SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
@@ -7138,6 +6979,9 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setReturnAddressIsTaken(true);
+
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
DebugLoc dl = Op.getDebugLoc();
@@ -7161,6 +7005,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
MFI->setFrameAddressIsTaken(true);
+
EVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -7298,6 +7143,7 @@ SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op,
break;
}
case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
case CallingConv::Fast:
// Pass 'nest' parameter in EAX.
// Must be kept in sync with X86CallingConv.td
@@ -7630,6 +7476,27 @@ SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
return DAG.getMergeValues(Ops, 2, dl);
}
+SDValue X86TargetLowering::LowerBIT_CONVERT(SDValue Op,
+ SelectionDAG &DAG) const {
+ EVT SrcVT = Op.getOperand(0).getValueType();
+ EVT DstVT = Op.getValueType();
+ assert((Subtarget->is64Bit() && !Subtarget->hasSSE2() &&
+ Subtarget->hasMMX() && !DisableMMX) &&
+ "Unexpected custom BIT_CONVERT");
+ assert((DstVT == MVT::i64 ||
+ (DstVT.isVector() && DstVT.getSizeInBits()==64)) &&
+ "Unexpected custom BIT_CONVERT");
+ // i64 <=> MMX conversions are Legal.
+ if (SrcVT==MVT::i64 && DstVT.isVector())
+ return Op;
+ if (DstVT==MVT::i64 && SrcVT.isVector())
+ return Op;
+ // MMX <=> MMX conversions are Legal.
+ if (SrcVT.isVector() && DstVT.isVector())
+ return Op;
+ // All other conversions need to be expanded.
+ return SDValue();
+}
SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const {
SDNode *Node = Op.getNode();
DebugLoc dl = Node->getDebugLoc();
@@ -7699,6 +7566,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SMULO:
case ISD::UMULO: return LowerXALUO(Op, DAG);
case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
+ case ISD::BIT_CONVERT: return LowerBIT_CONVERT(Op, DAG);
}
}
@@ -8203,9 +8071,15 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MachineOperand& dest1Oper = bInstr->getOperand(0);
MachineOperand& dest2Oper = bInstr->getOperand(1);
MachineOperand* argOpers[2 + X86AddrNumOperands];
- for (int i=0; i < 2 + X86AddrNumOperands; ++i)
+ for (int i=0; i < 2 + X86AddrNumOperands; ++i) {
argOpers[i] = &bInstr->getOperand(i+2);
+ // We use some of the operands multiple times, so conservatively just
+ // clear any kill flags that might be present.
+ if (argOpers[i]->isReg() && argOpers[i]->isUse())
+ argOpers[i]->setIsKill(false);
+ }
+
// x86 address has 5 operands: base, index, scale, displacement, and segment.
int lastAddrIndx = X86AddrNumOperands - 1; // [0,3]
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 440601f9..1ef1a7b 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -563,7 +563,7 @@ namespace llvm {
return !X86ScalarSSEf64 || VT == MVT::f80;
}
- virtual const X86Subtarget* getSubtarget() const {
+ const X86Subtarget* getSubtarget() const {
return Subtarget;
}
@@ -677,6 +677,7 @@ namespace llvm {
SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
SelectionDAG &DAG) const;
+ SDValue LowerBIT_CONVERT(SDValue op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
@@ -743,23 +744,6 @@ namespace llvm {
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp) const;
- SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile,
- const Value *DstSV,
- uint64_t DstSVOff) const;
- SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain,
- SDValue Dst, SDValue Src,
- SDValue Size, unsigned Align,
- bool isVolatile, bool AlwaysInline,
- const Value *DstSV,
- uint64_t DstSVOff,
- const Value *SrcSV,
- uint64_t SrcSVOff) const;
-
/// Utility function to emit string processing sse4.2 instructions
/// that return in xmm0.
/// This takes the instruction to expand, the associated machine basic
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index f5c3dbf..97eb17c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -18,7 +18,9 @@
//
// 64-bits but only 32 bits are significant.
-def i64i32imm : Operand<i64>;
+def i64i32imm : Operand<i64> {
+ let ParserMatchClass = ImmSExti64i32AsmOperand;
+}
// 64-bits but only 32 bits are significant, and those bits are treated as being
// pc relative.
@@ -30,7 +32,7 @@ def i64i32imm_pcrel : Operand<i64> {
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti64i8AsmOperand;
}
// Special i64mem for addresses of load folding tail calls. These are not
@@ -198,6 +200,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi64 : I<0, Pseudo, (outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -208,6 +211,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
+ let mayLoad = 1 in
def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
}
@@ -241,6 +245,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr),
def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"popcnt{q}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -267,14 +272,16 @@ def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
"push{q}\t$imm", []>;
def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
"push{q}\t$imm", []>;
-def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i64i32imm:$imm),
"push{q}\t$imm", []>;
}
-let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
-def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf{q}", []>, REX_W;
-let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
-def PUSHFQ64 : I<0x9C, RawFrm, (outs), (ins), "pushf{q}", []>;
+let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1, neverHasSideEffects=1 in
+def POPF64 : I<0x9D, RawFrm, (outs), (ins), "popfq", []>,
+ Requires<[In64BitMode]>;
+let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
+def PUSHF64 : I<0x9C, RawFrm, (outs), (ins), "pushfq", []>,
+ Requires<[In64BitMode]>;
def LEA64_32r : I<0x8D, MRMSrcMem,
(outs GR32:$dst), (ins lea64_32mem:$src),
@@ -309,16 +316,22 @@ def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
} // Defs = [EFLAGS]
// Repeat string ops
-let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI] in
+let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in
def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}",
[(X86rep_movs i64)]>, REP;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI] in
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in
def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}",
[(X86rep_stos i64)]>, REP;
-def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scas{q}", []>;
+let Defs = [EDI,ESI], Uses = [EDI,ESI,EFLAGS] in
+def MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "movsq", []>;
+
+let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+def STOSQ : RI<0xAB, RawFrm, (outs), (ins), "stosq", []>;
-def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmps{q}", []>;
+def SCAS64 : RI<0xAF, RawFrm, (outs), (ins), "scasq", []>;
+
+def CMPS64 : RI<0xA7, RawFrm, (outs), (ins), "cmpsq", []>;
// Fast system-call instructions
def SYSEXIT64 : RI<0x35, RawFrm,
@@ -341,8 +354,17 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
[(set GR64:$dst, i64immSExt32:$src)]>;
}
+// The assembler accepts movq of a 64-bit immediate as an alternate spelling of
+// movabsq.
+let isAsmParserOnly = 1 in {
+def MOV64ri_alt : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
+ "mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
+
+let isCodeGenOnly = 1 in {
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
@@ -398,9 +420,9 @@ def MOV64dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG_64:$src),
+def MOV64rc : I<0x20, MRMDestReg, (outs GR64:$dst), (ins CONTROL_REG:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_64:$dst), (ins GR64:$src),
+def MOV64cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
// Sign/Zero extenders
@@ -478,7 +500,7 @@ def def32 : PatLeaf<(i32 GR32:$src), [{
// In the case of a 32-bit def that is known to implicitly zero-extend,
// we can use a SUBREG_TO_REG.
def : Pat<(i64 (zext def32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
let neverHasSideEffects = 1 in {
let Defs = [RAX], Uses = [EAX] in
@@ -496,7 +518,7 @@ let neverHasSideEffects = 1 in {
let Defs = [EFLAGS] in {
-def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i32imm:$src),
+def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src),
"add{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -555,7 +577,7 @@ def ADD64mi32 : RIi32<0x81, MRM0m, (outs), (ins i64mem:$dst, i64i32imm :$src2),
let Uses = [EFLAGS] in {
-def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i32imm:$src),
+def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src),
"adc{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -565,9 +587,11 @@ def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst),
"adc{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (adde GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def ADC64rr_REV : RI<0x13, MRMSrcReg , (outs GR32:$dst),
(ins GR64:$src1, GR64:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC64rm : RI<0x13, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -605,9 +629,11 @@ def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
[(set GR64:$dst, EFLAGS,
(X86sub_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
@@ -629,7 +655,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
+def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src),
"sub{q}\t{$src, %rax|%rax, $src}", []>;
// Memory-Register Subtraction
@@ -657,9 +683,11 @@ def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst),
"sbb{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, (sube GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def SBB64rr_REV : RI<0x1B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sbb{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB64rm : RI<0x1B, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
@@ -676,7 +704,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst),
[(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
-def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i32imm:$src),
+def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src),
"sbb{q}\t{$src, %rax|%rax, $src}", []>;
def SBB64mr : RI<0x19, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
@@ -1076,7 +1104,7 @@ def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst",
[(store (not (loadi64 addr:$dst)), addr:$dst)]>;
let Defs = [EFLAGS] in {
-def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i32imm:$src),
+def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src),
"and{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1086,9 +1114,11 @@ def AND64rr : RI<0x21, MRMDestReg,
"and{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86and_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND64rm : RI<0x23, MRMSrcMem,
(outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
@@ -1129,9 +1159,11 @@ def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
"or{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86or_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
@@ -1162,7 +1194,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (or (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i32imm:$src),
+def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src),
"or{q}\t{$src, %rax|%rax, $src}", []>;
let isTwoAddress = 1 in {
@@ -1172,9 +1204,11 @@ def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
"xor{q}\t{$src2, $dst|$dst, $src2}",
[(set GR64:$dst, EFLAGS,
(X86xor_flag GR64:$src1, GR64:$src2))]>;
+let isCodeGenOnly = 1 in {
def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
@@ -1205,7 +1239,7 @@ def XOR64mi32 : RIi32<0x81, MRM6m, (outs), (ins i64mem:$dst, i64i32imm:$src),
[(store (xor (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst),
(implicit EFLAGS)]>;
-def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
+def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i64i32imm:$src),
"xor{q}\t{$src, %rax|%rax, $src}", []>;
} // Defs = [EFLAGS]
@@ -1216,7 +1250,7 @@ def XOR64i32 : RIi32<0x35, RawFrm, (outs), (ins i32imm:$src),
// Integer comparison
let Defs = [EFLAGS] in {
-def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src),
+def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i64i32imm:$src),
"test{q}\t{$src, %rax|%rax, $src}", []>;
let isCommutable = 1 in
def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2),
@@ -1238,7 +1272,7 @@ def TEST64mi32 : RIi32<0xF7, MRM0m, (outs),
i64immSExt32:$src2), 0))]>;
-def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src),
+def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i64i32imm:$src),
"cmp{q}\t{$src, %rax|%rax, $src}", []>;
def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2),
"cmp{q}\t{$src2, $src1|$src1, $src2}",
@@ -1293,7 +1327,8 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB;
+ [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB,
+ REX_W;
// Note that these instructions don't need FastBTMem because that
// only applies when the other operand is in a register. When it's
// an immediate, bt is still fast.
@@ -1714,11 +1749,13 @@ def XCHG64rr : RI<0x87, MRMSrcReg, (outs GR64:$dst), (ins GR64:$val,GR64:$src),
def XADD64rr : RI<0xC1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def XADD64rm : RI<0xC1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"xadd{q}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG64rr : RI<0xB1, MRMDestReg, (outs GR64:$dst), (ins GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in
def CMPXCHG64rm : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
"cmpxchg{q}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -1730,7 +1767,7 @@ def XCHG64ar : RI<0x90, AddRegFrm, (outs), (ins GR64:$src),
"xchg{q}\t{$src, %rax|%rax, $src}", []>;
// Optimized codegen when the non-memory output is not used.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
def LOCK_ADD64mr : RI<0x03, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2),
"lock\n\t"
@@ -1982,14 +2019,14 @@ def : Pat<(extloadi64i16 addr:$src), (MOVZX64rm16 addr:$src)>;
// defined after an extload.
def : Pat<(extloadi64i32 addr:$src),
(SUBREG_TO_REG (i64 0), (MOV32rm addr:$src),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// anyext. Define these to do an explicit zero-extend to
// avoid partial-register updates.
def : Pat<(i64 (anyext GR8 :$src)), (MOVZX64rr8 GR8 :$src)>;
def : Pat<(i64 (anyext GR16:$src)), (MOVZX64rr16 GR16 :$src)>;
def : Pat<(i64 (anyext GR32:$src)),
- (SUBREG_TO_REG (i64 0), GR32:$src, x86_subreg_32bit)>;
+ (SUBREG_TO_REG (i64 0), GR32:$src, sub_32bit)>;
//===----------------------------------------------------------------------===//
// Some peepholes
@@ -2016,54 +2053,54 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(SUBREG_TO_REG
(i64 0),
(AND32ri
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
(i32 (GetLo32XForm imm:$imm))),
- x86_subreg_32bit)>;
+ sub_32bit)>;
// r & (2^32-1) ==> movz
def : Pat<(and GR64:$src, 0x00000000FFFFFFFF),
- (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
// r & (2^16-1) ==> movz
def : Pat<(and GR64:$src, 0xffff),
- (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)))>;
+ (MOVZX64rr16 (i16 (EXTRACT_SUBREG GR64:$src, sub_16bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR64:$src, 0xff),
- (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)))>;
+ (MOVZX64rr8 (i8 (EXTRACT_SUBREG GR64:$src, sub_8bit)))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
- (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, x86_subreg_8bit))>,
+ (MOVZX32rr8 (EXTRACT_SUBREG GR32:$src1, sub_8bit))>,
Requires<[In64BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
- (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, x86_subreg_8bit)))>,
+ (MOVZX16rr8 (i8 (EXTRACT_SUBREG GR16:$src1, sub_8bit)))>,
Requires<[In64BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR64:$src, i32),
- (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>;
+ (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, sub_32bit))>;
def : Pat<(sext_inreg GR64:$src, i16),
- (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit))>;
+ (MOVSX64rr16 (EXTRACT_SUBREG GR64:$src, sub_16bit))>;
def : Pat<(sext_inreg GR64:$src, i8),
- (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit))>;
+ (MOVSX64rr8 (EXTRACT_SUBREG GR64:$src, sub_8bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
- (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit))>,
+ (MOVSX32rr8 (EXTRACT_SUBREG GR32:$src, sub_8bit))>,
Requires<[In64BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
- (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)))>,
+ (MOVSX16rr8 (i8 (EXTRACT_SUBREG GR16:$src, sub_8bit)))>,
Requires<[In64BitMode]>;
// trunc patterns
def : Pat<(i32 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_32bit)>;
def : Pat<(i16 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR64:$src)),
- (EXTRACT_SUBREG GR64:$src, x86_subreg_8bit)>;
+ (EXTRACT_SUBREG GR64:$src, sub_8bit)>;
def : Pat<(i8 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR32:$src, sub_8bit)>,
Requires<[In64BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
- (EXTRACT_SUBREG GR16:$src, x86_subreg_8bit)>,
+ (EXTRACT_SUBREG GR16:$src, sub_8bit)>,
Requires<[In64BitMode]>;
// h-register tricks.
@@ -2079,67 +2116,67 @@ def : Pat<(and (srl_su GR64:$src, (i8 8)), (i64 255)),
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32_NOREXrr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In64BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(i64 (zext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
def : Pat<(i64 (anyext (srl_su GR16:$src, (i8 8)))),
(SUBREG_TO_REG
(i64 0),
(MOVZX32_NOREXrr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_32bit)>;
+ sub_8bit_hi)),
+ sub_32bit)>;
// h-register extract and store.
def : Pat<(store (i8 (trunc_su (srl_su GR64:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i64 (COPY_TO_REGCLASS GR64:$src, GR64_ABCD)),
- x86_subreg_8bit_hi))>;
+ sub_8bit_hi))>;
def : Pat<(store (i8 (trunc_su (srl_su GR32:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
(MOV8mr_NOREX
addr:$dst,
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In64BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a21bfb9..34e12ca 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -744,17 +744,17 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
case X86::MOVZX32rr8:
case X86::MOVSX64rr8:
case X86::MOVZX64rr8:
- SubIdx = 1;
+ SubIdx = X86::sub_8bit;
break;
case X86::MOVSX32rr16:
case X86::MOVZX32rr16:
case X86::MOVSX64rr16:
case X86::MOVZX64rr16:
- SubIdx = 3;
+ SubIdx = X86::sub_16bit;
break;
case X86::MOVSX64rr32:
case X86::MOVZX64rr32:
- SubIdx = 4;
+ SubIdx = X86::sub_32bit;
break;
}
return true;
@@ -1065,7 +1065,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
const TargetRegisterInfo *TRI) const {
- DebugLoc DL = MBB.findDebugLoc(I);
+ DebugLoc DL = Orig->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
DestReg = TRI->getSubReg(DestReg, SubIdx);
@@ -1154,7 +1154,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
.addReg(leaInReg)
.addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
get(Opc), leaOutReg);
@@ -1198,7 +1198,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
.addReg(leaInReg2)
.addReg(Src2, getKillRegState(isKill2))
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
addRegReg(MIB, leaInReg, true, leaInReg2, true);
}
if (LV && isKill2 && InsMI2)
@@ -1212,7 +1212,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
+ .addImm(X86::sub_16bit);
if (LV) {
// Update live variables
@@ -1901,8 +1901,8 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL = MBB.findDebugLoc(MI);
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
// Determine if DstRC and SrcRC have a common superclass in common.
const TargetRegisterClass *CommonRC = DestRC;
@@ -1993,12 +1993,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
if (SrcReg != X86::EFLAGS)
return false;
if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFQ64));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF64));
BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg);
return true;
} else if (DestRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
- BuildMI(MBB, MI, DL, get(X86::PUSHFD));
+ BuildMI(MBB, MI, DL, get(X86::PUSHF32));
BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg);
return true;
}
@@ -2007,12 +2007,12 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
return false;
if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFQ));
+ BuildMI(MBB, MI, DL, get(X86::POPF64));
return true;
} else if (SrcRC == &X86::GR32RegClass ||
DestRC == &X86::GR32_NOSPRegClass) {
BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg);
- BuildMI(MBB, MI, DL, get(X86::POPFD));
+ BuildMI(MBB, MI, DL, get(X86::POPF32));
return true;
}
}
@@ -2133,7 +2133,8 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIdx,
- const TargetRegisterClass *RC) const {
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
@@ -2230,7 +2231,8 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIdx,
- const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const {
const MachineFunction &MF = *MBB.getParent();
bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
@@ -2256,7 +2258,8 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2284,7 +2287,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
CalleeFrameSize += SlotSize;
BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill);
} else {
- storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass);
+ storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass,
+ &RI);
}
}
@@ -2294,7 +2298,8 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty())
return false;
@@ -2314,7 +2319,7 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
if (RegClass != &X86::VR128RegClass && !isWin64) {
BuildMI(MBB, MI, DL, get(Opc), Reg);
} else {
- loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass);
+ loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI);
}
}
return true;
@@ -2478,9 +2483,9 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned DstReg = NewMI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(DstReg))
NewMI->getOperand(0).setReg(RI.getSubReg(DstReg,
- 4/*x86_subreg_32bit*/));
+ X86::sub_32bit));
else
- NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/);
+ NewMI->getOperand(0).setSubReg(X86::sub_32bit);
}
return NewMI;
}
@@ -2526,9 +2531,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; RCSize = 2; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; RCSize = 4; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; RCSize = 8; break;
}
// Check if it's safe to fold the load. If the size of the object is
// narrower than the load width, then it's not.
@@ -2595,9 +2600,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
switch (MI->getOpcode()) {
default: return NULL;
case X86::TEST8rr: NewOpc = X86::CMP8ri; break;
- case X86::TEST16rr: NewOpc = X86::CMP16ri; break;
- case X86::TEST32rr: NewOpc = X86::CMP32ri; break;
- case X86::TEST64rr: NewOpc = X86::CMP64ri32; break;
+ case X86::TEST16rr: NewOpc = X86::CMP16ri8; break;
+ case X86::TEST32rr: NewOpc = X86::CMP32ri8; break;
+ case X86::TEST64rr: NewOpc = X86::CMP64ri8; break;
}
// Change to CMPXXri r, 0 first.
MI->setDesc(get(NewOpc));
@@ -2805,16 +2810,22 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
switch (DataMI->getOpcode()) {
default: break;
case X86::CMP64ri32:
+ case X86::CMP64ri8:
case X86::CMP32ri:
+ case X86::CMP32ri8:
case X86::CMP16ri:
+ case X86::CMP16ri8:
case X86::CMP8ri: {
MachineOperand &MO0 = DataMI->getOperand(0);
MachineOperand &MO1 = DataMI->getOperand(1);
if (MO1.getImm() == 0) {
switch (DataMI->getOpcode()) {
default: break;
+ case X86::CMP64ri8:
case X86::CMP64ri32: NewOpc = X86::TEST64rr; break;
+ case X86::CMP32ri8:
case X86::CMP32ri: NewOpc = X86::TEST32rr; break;
+ case X86::CMP16ri8:
case X86::CMP16ri: NewOpc = X86::TEST16rr; break;
case X86::CMP8ri: NewOpc = X86::TEST8rr; break;
}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index df99c7f..62d7c74 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -590,11 +590,13 @@ public:
MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
SmallVectorImpl<MachineOperand> &Addr,
@@ -606,7 +608,8 @@ public:
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
SmallVectorImpl<MachineOperand> &Addr,
@@ -617,11 +620,13 @@ public:
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual
MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a2754ea..0d59c42 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -195,15 +195,15 @@ def ptr_rc_nosp : PointerLikeRegClass<1>;
//
def X86MemAsmOperand : AsmOperandClass {
let Name = "Mem";
- let SuperClass = ?;
-}
-def X86AbsMemAsmOperand : AsmOperandClass {
- let Name = "AbsMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [];
}
def X86NoSegMemAsmOperand : AsmOperandClass {
let Name = "NoSegMem";
- let SuperClass = X86MemAsmOperand;
+ let SuperClasses = [X86MemAsmOperand];
+}
+def X86AbsMemAsmOperand : AsmOperandClass {
+ let Name = "AbsMem";
+ let SuperClasses = [X86NoSegMemAsmOperand];
}
class X86MemOperand<string printMethod> : Operand<iPTR> {
let PrintMethod = printMethod;
@@ -270,19 +270,49 @@ def SSECC : Operand<i8> {
let PrintMethod = "printSSECC";
}
-def ImmSExt8AsmOperand : AsmOperandClass {
- let Name = "ImmSExt8";
- let SuperClass = ImmAsmOperand;
+class ImmSExtAsmOperandClass : AsmOperandClass {
+ let SuperClasses = [ImmAsmOperand];
+ let RenderMethod = "addImmOperands";
+}
+
+// Sign-extended immediate classes. We don't need to define the full lattice
+// here because there is no instruction with an ambiguity between ImmSExti64i32
+// and ImmSExti32i8.
+//
+// The strange ranges come from the fact that the assembler always works with
+// 64-bit immediates, but for a 16-bit target value we want to accept both "-1"
+// (which will be a -1ULL), and "0xFF" (-1 in 16-bits).
+
+// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i32";
+}
+
+// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti16i8";
+ let SuperClasses = [ImmSExti64i32AsmOperand];
+}
+
+// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti32i8";
+}
+
+// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF]
+def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
+ let Name = "ImmSExti64i8";
+ let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand];
}
// A couple of more descriptive operand definitions.
// 16-bits but only 8 bits are significant.
def i16i8imm : Operand<i16> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti16i8AsmOperand;
}
// 32-bits but only 8 bits are significant.
def i32i8imm : Operand<i32> {
- let ParserMatchClass = ImmSExt8AsmOperand;
+ let ParserMatchClass = ImmSExti32i8AsmOperand;
}
//===----------------------------------------------------------------------===//
@@ -542,8 +572,10 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
-def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
+def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", []>;
+// FIXME: need to make sure that "int $3" matches int3
+def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iret{w}", []>, OpSize;
def IRET32 : I<0xcf, RawFrm, (outs), (ins), "iret{l}", []>;
@@ -693,6 +725,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri : I<0, Pseudo, (outs),
(ins GR32_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
+ let mayLoad = 1 in
def TCRETURNmi : I<0, Pseudo, (outs),
(ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
@@ -706,8 +739,16 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL",
[]>;
+ let mayLoad = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
+
+ // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL
+ // marker on instructions, while still being able to relax.
+ let isCodeGenOnly = 1 in {
+ def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst),
+ "jmp\t$dst # TAILCALL", []>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -719,10 +760,12 @@ def LEAVE : I<0xC9, RawFrm,
def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
+let mayLoad = 1 in
def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"popcnt{w}\t{$src, $dst|$dst, $src}", []>, OpSize, XS;
def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
+let mayLoad = 1 in
def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"popcnt{l}\t{$src, $dst|$dst, $src}", []>, XS;
@@ -762,12 +805,14 @@ def PUSHi32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
}
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in {
-def POPF : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
-def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf{l}", []>;
+def POPF16 : I<0x9D, RawFrm, (outs), (ins), "popf{w}", []>, OpSize;
+def POPF32 : I<0x9D, RawFrm, (outs), (ins), "popf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in {
-def PUSHF : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
-def PUSHFD : I<0x9C, RawFrm, (outs), (ins), "pushf{l}", []>;
+def PUSHF16 : I<0x9C, RawFrm, (outs), (ins), "pushf{w}", []>, OpSize;
+def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 1 in // GR32 = bswap GR32
@@ -867,7 +912,7 @@ def RDTSC : I<0x31, RawFrm, (outs), (ins), "rdtsc", [(X86rdtsc)]>,
let Defs = [RAX, RCX, RDX] in
def RDTSCP : I<0x01, MRM_F9, (outs), (ins), "rdtscp", []>, TB;
-let isBarrier = 1, hasCtrlDep = 1 in {
+let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
def TRAP : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
}
@@ -966,36 +1011,47 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store (i32 imm:$src), addr:$dst)]>;
-def MOV8o8a : Ii8 <0xA0, RawFrm, (outs), (ins offset8:$src),
+/// moffs8, moffs16 and moffs32 versions of moves. The immediate is a
+/// 32-bit offset from the PC. These are only valid in x86-32 mode.
+def MOV8o8a : Ii32 <0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{b}\t{$src, %al|%al, $src}", []>;
-def MOV16o16a : Ii16 <0xA1, RawFrm, (outs), (ins offset16:$src),
+def MOV16o16a : Ii32 <0xA1, RawFrm, (outs), (ins offset16:$src),
"mov{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
def MOV32o32a : Ii32 <0xA1, RawFrm, (outs), (ins offset32:$src),
"mov{l}\t{$src, %eax|%eax, $src}", []>;
-
-def MOV8ao8 : Ii8 <0xA2, RawFrm, (outs offset8:$dst), (ins),
+def MOV8ao8 : Ii32 <0xA2, RawFrm, (outs offset8:$dst), (ins),
"mov{b}\t{%al, $dst|$dst, %al}", []>;
-def MOV16ao16 : Ii16 <0xA3, RawFrm, (outs offset16:$dst), (ins),
+def MOV16ao16 : Ii32 <0xA3, RawFrm, (outs offset16:$dst), (ins),
"mov{w}\t{%ax, $dst|$dst, %ax}", []>, OpSize;
def MOV32ao32 : Ii32 <0xA3, RawFrm, (outs offset32:$dst), (ins),
"mov{l}\t{%eax, $dst|$dst, %eax}", []>;
-
+
// Moves to and from segment registers
def MOV16rs : I<0x8C, MRMDestReg, (outs GR16:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32rs : I<0x8C, MRMDestReg, (outs GR32:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16ms : I<0x8C, MRMDestMem, (outs i16mem:$dst), (ins SEGMENT_REG:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32ms : I<0x8C, MRMDestMem, (outs i32mem:$dst), (ins SEGMENT_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR16:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sr : I<0x8E, MRMSrcReg, (outs SEGMENT_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
def MOV16sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i16mem:$src),
- "mov{w}\t{$src, $dst|$dst, $src}", []>;
+ "mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
+def MOV32sm : I<0x8E, MRMSrcMem, (outs SEGMENT_REG:$dst), (ins i32mem:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>;
+let isCodeGenOnly = 1 in {
def MOV8rr_REV : I<0x8A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src),
"mov{b}\t{$src, $dst|$dst, $src}", []>;
def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"mov{w}\t{$src, $dst|$dst, $src}", []>, OpSize;
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
+}
let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
@@ -1059,10 +1115,10 @@ def MOV32dr : I<0x23, MRMSrcReg, (outs DEBUG_REG:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
// Moves to and from control registers
-def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG_32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
-def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src),
- "mov{q}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32rc : I<0x20, MRMDestReg, (outs GR32:$dst), (ins CONTROL_REG:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
+def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG:$dst), (ins GR32:$src),
+ "mov{l}\t{$src, $dst|$dst, $src}", []>, TB;
//===----------------------------------------------------------------------===//
// Fixed-Register Multiplication and Division Instructions...
@@ -1746,6 +1802,7 @@ def AND32rr : I<0x21, MRMDestReg,
// AND instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def AND8rr_REV : I<0x22, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"and{b}\t{$src2, $dst|$dst, $src2}", []>;
def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
@@ -1754,6 +1811,7 @@ def AND16rr_REV : I<0x23, MRMSrcReg, (outs GR16:$dst),
def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def AND8rm : I<0x22, MRMSrcMem,
(outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
@@ -1872,6 +1930,7 @@ def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
// OR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def OR8rr_REV : I<0x0A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"or{b}\t{$src2, $dst|$dst, $src2}", []>;
def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
@@ -1880,6 +1939,7 @@ def OR16rr_REV : I<0x0B, MRMSrcReg, (outs GR16:$dst),
def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
@@ -1988,6 +2048,7 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
// XOR instructions with the destination register in REG and the source register
// in R/M. Included for the disassembler.
+let isCodeGenOnly = 1 in {
def XOR8rr_REV : I<0x32, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}", []>;
def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
@@ -1996,6 +2057,7 @@ def XOR16rr_REV : I<0x33, MRMSrcReg, (outs GR16:$dst),
def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def XOR8rm : I<0x32, MRMSrcMem,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
@@ -2793,6 +2855,7 @@ def ADC32rr : I<0x11, MRMDestReg, (outs GR32:$dst),
[(set GR32:$dst, (adde GR32:$src1, GR32:$src2))]>;
}
+let isCodeGenOnly = 1 in {
def ADC8rr_REV : I<0x12, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"adc{b}\t{$src2, $dst|$dst, $src2}", []>;
def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
@@ -2801,6 +2864,7 @@ def ADC16rr_REV : I<0x13, MRMSrcReg, (outs GR16:$dst),
def ADC32rr_REV : I<0x13, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"adc{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def ADC8rm : I<0x12, MRMSrcMem , (outs GR8:$dst),
(ins GR8:$src1, i8mem:$src2),
@@ -2888,6 +2952,7 @@ def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
[(set GR32:$dst, EFLAGS,
(X86sub_flag GR32:$src1, GR32:$src2))]>;
+let isCodeGenOnly = 1 in {
def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}", []>;
def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
@@ -2896,6 +2961,7 @@ def SUB16rr_REV : I<0x2B, MRMSrcReg, (outs GR16:$dst),
def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
// Register-Memory Subtraction
def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
@@ -3039,6 +3105,7 @@ let isTwoAddress = 0 in {
"sbb{l}\t{$src, %eax|%eax, $src}", []>;
}
+let isCodeGenOnly = 1 in {
def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}", []>;
def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
@@ -3047,6 +3114,7 @@ def SBB16rr_REV : I<0x1B, MRMSrcReg, (outs GR16:$dst),
def SBB32rr_REV : I<0x1B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"sbb{l}\t{$src2, $dst|$dst, $src2}", []>;
+}
def SBB8rm : I<0x1A, MRMSrcMem, (outs GR8:$dst), (ins GR8:$src1, i8mem:$src2),
"sbb{b}\t{$src2, $dst|$dst, $src2}",
@@ -3864,12 +3932,14 @@ def XADD16rr : I<0xC1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def XADD32rr : I<0xC1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def XADD8rm : I<0xC0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"xadd{b}\t{$src, $dst|$dst, $src}", []>, TB;
def XADD16rm : I<0xC1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"xadd{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def XADD32rm : I<0xC1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"xadd{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
def CMPXCHG8rr : I<0xB0, MRMDestReg, (outs GR8:$dst), (ins GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
@@ -3878,12 +3948,14 @@ def CMPXCHG16rr : I<0xB1, MRMDestReg, (outs GR16:$dst), (ins GR16:$src),
def CMPXCHG32rr : I<0xB1, MRMDestReg, (outs GR32:$dst), (ins GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+let mayLoad = 1, mayStore = 1 in {
def CMPXCHG8rm : I<0xB0, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"cmpxchg{b}\t{$src, $dst|$dst, $src}", []>, TB;
def CMPXCHG16rm : I<0xB1, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src),
"cmpxchg{w}\t{$src, $dst|$dst, $src}", []>, TB, OpSize;
def CMPXCHG32rm : I<0xB1, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"cmpxchg{l}\t{$src, $dst|$dst, $src}", []>, TB;
+}
let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX] in
def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
@@ -3891,7 +3963,7 @@ def CMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$dst),
// Optimized codegen when the non-memory output is not used.
// FIXME: Use normal add / sub instructions and add lock prefix dynamically.
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], mayLoad = 1, mayStore = 1 in {
def LOCK_ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2),
"lock\n\t"
"add{b}\t{$src2, $dst|$dst, $src2}", []>, LOCK;
@@ -4453,7 +4525,7 @@ def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
// Except for i16 -> i32 since isel expect i16 ops to be promoted to i32.
def : Pat<(i32 (anyext GR16:$src)),
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, x86_subreg_16bit)>;
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR16:$src, sub_16bit)>;
//===----------------------------------------------------------------------===//
@@ -4473,81 +4545,81 @@ def : Pat<(store (add (loadi32 addr:$dst), 128), addr:$dst),
// r & (2^16-1) ==> movz
def : Pat<(and GR32:$src1, 0xffff),
- (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, x86_subreg_16bit))>;
+ (MOVZX32rr16 (EXTRACT_SUBREG GR32:$src1, sub_16bit))>;
// r & (2^8-1) ==> movz
def : Pat<(and GR32:$src1, 0xff),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src1,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// r & (2^8-1) ==> movz
def : Pat<(and GR16:$src1, 0xff),
(MOVZX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src1,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// sext_inreg patterns
def : Pat<(sext_inreg GR32:$src, i16),
- (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit))>;
+ (MOVSX32rr16 (EXTRACT_SUBREG GR32:$src, sub_16bit))>;
def : Pat<(sext_inreg GR32:$src, i8),
(MOVSX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
def : Pat<(sext_inreg GR16:$src, i8),
(MOVSX16rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit))>,
+ sub_8bit))>,
Requires<[In32BitMode]>;
// trunc patterns
def : Pat<(i16 (trunc GR32:$src)),
- (EXTRACT_SUBREG GR32:$src, x86_subreg_16bit)>;
+ (EXTRACT_SUBREG GR32:$src, sub_16bit)>;
def : Pat<(i8 (trunc GR32:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc GR16:$src)),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit)>,
+ sub_8bit)>,
Requires<[In32BitMode]>;
// h-register tricks
def : Pat<(i8 (trunc (srl_su GR16:$src, (i8 8)))),
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(i8 (trunc (srl_su GR32:$src, (i8 8)))),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src, GR32_ABCD)),
- x86_subreg_8bit_hi)>,
+ sub_8bit_hi)>,
Requires<[In32BitMode]>;
def : Pat<(srl GR16:$src, (i8 8)),
(EXTRACT_SUBREG
(MOVZX32rr8
(EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src, GR16_ABCD)),
- x86_subreg_8bit_hi)),
- x86_subreg_16bit)>,
+ sub_8bit_hi)),
+ sub_16bit)>,
Requires<[In32BitMode]>;
def : Pat<(i32 (zext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(i32 (anyext (srl_su GR16:$src, (i8 8)))),
(MOVZX32rr8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS GR16:$src,
GR16_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(and (srl_su GR32:$src, (i8 8)), (i32 255)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
def : Pat<(srl (and_su GR32:$src, 0xff00), (i8 8)),
(MOVZX32rr8 (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS GR32:$src,
GR32_ABCD)),
- x86_subreg_8bit_hi))>,
+ sub_8bit_hi))>,
Requires<[In32BitMode]>;
// (shl x, 1) ==> (add x, x)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 744af50..0952fc8 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -117,9 +117,6 @@ def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVQ64gmr : MMXRI<0x7F, MRMDestMem, (outs),
- (ins i64mem:$dst, VR64:$src),
- "movq\t{$src, $dst|$dst, $src}", []>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
@@ -133,10 +130,10 @@ let neverHasSideEffects = 1 in
def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg,
(outs GR64:$dst), (ins VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
- "movd\t{$src, $dst|$dst, $src}",
- [(set VR64:$dst,
- (v1i64 (scalar_to_vector GR64:$src)))]>;
+def MMX_MOVD64rrv164 : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set VR64:$dst,
+ (v1i64 (scalar_to_vector GR64:$src)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 2129580..5580ba7 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -117,17 +117,17 @@ def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
return cast<LoadSDNode>(N)->getAlignment() >= 16;
}]>;
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
+def alignedloadfsf32 : PatFrag<(ops node:$ptr),
(f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
+def alignedloadfsf64 : PatFrag<(ops node:$ptr),
(f64 (alignedload node:$ptr))>;
-def alignedloadv4f32 : PatFrag<(ops node:$ptr),
+def alignedloadv4f32 : PatFrag<(ops node:$ptr),
(v4f32 (alignedload node:$ptr))>;
-def alignedloadv2f64 : PatFrag<(ops node:$ptr),
+def alignedloadv2f64 : PatFrag<(ops node:$ptr),
(v2f64 (alignedload node:$ptr))>;
-def alignedloadv4i32 : PatFrag<(ops node:$ptr),
+def alignedloadv4i32 : PatFrag<(ops node:$ptr),
(v4i32 (alignedload node:$ptr))>;
-def alignedloadv2i64 : PatFrag<(ops node:$ptr),
+def alignedloadv2i64 : PatFrag<(ops node:$ptr),
(v2i64 (alignedload node:$ptr))>;
// Like 'load', but uses special alignment checks suitable for use in
@@ -387,11 +387,11 @@ def MOVSSrr : SSI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4f32 VR128:$src1),
- (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
- (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -403,11 +403,11 @@ def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>;
}
// Store scalar value to memory.
@@ -419,7 +419,7 @@ def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
- (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
@@ -449,7 +449,7 @@ def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src),
[(set GR32:$dst, (int_x86_sse_cvtss2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtps2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>;
@@ -509,6 +509,17 @@ let mayLoad = 1 in
def CMPSSrm : SSIi8<0xC2, MRMSrcMem,
(outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg,
+ (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem,
+ (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2),
+ "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -518,25 +529,25 @@ def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2),
def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2),
"ucomiss\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>;
-
+
def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2),
"comiss\t{$src2, $src1|$src1, $src2}", []>;
-
+
} // Defs = [EFLAGS]
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_cmp_ss
+ [(set VR128:$dst, (int_x86_sse_cmp_ss
VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f32mem:$src, SSECC:$cc),
"cmp${cc}ss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1,
@@ -1009,6 +1020,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+ def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>;
+}
}
def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>;
@@ -1102,7 +1123,8 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
}
// Load, store, and memory fence
-def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
+def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
+ TB, Requires<[HasSSE1]>;
// MXCSR register
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
@@ -1130,7 +1152,7 @@ def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
@@ -1152,11 +1174,11 @@ def MOVSDrr : SDI<0x10, MRMSrcReg,
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2f64 VR128:$src1),
- (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
- (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
@@ -1168,15 +1190,15 @@ def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>;
}
// Store scalar value to memory.
@@ -1188,7 +1210,7 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst,
- (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
@@ -1255,7 +1277,7 @@ def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src),
[(set GR32:$dst, (int_x86_sse2_cvtsd2si
(load addr:$src)))]>;
-// Match intrinisics which expect MM and XMM operand(s).
+// Match intrinsics which expect MM and XMM operand(s).
def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src),
"cvtpd2pi\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>;
@@ -1297,6 +1319,17 @@ let mayLoad = 1 in
def CMPSDrm : SDIi8<0xC2, MRMSrcMem,
(outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}", []>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg,
+ (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+let mayLoad = 1 in
+ def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem,
+ (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2),
+ "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
let Defs = [EFLAGS] in {
@@ -1311,13 +1344,13 @@ def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2),
// Aliases to match intrinsics which expect XMM operand(s).
let Constraints = "$src1 = $dst" in {
def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, VR128:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
VR128:$src, imm:$cc))]>;
def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem,
- (outs VR128:$dst),
+ (outs VR128:$dst),
(ins VR128:$src1, f64mem:$src, SSECC:$cc),
"cmp${cc}sd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1,
@@ -1655,7 +1688,7 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
+ [(set VR128:$dst,
(int_x86_sse2_cvttps2dq VR128:$src))]>,
XS, Requires<[HasSSE2]>;
def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -1890,6 +1923,16 @@ let Constraints = "$src1 = $dst" in {
"cmp${cc}pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1,
(memop addr:$src), imm:$cc))]>;
+
+ // Accept explicit immediate argument form instead of comparison code.
+let isAsmParserOnly = 1 in {
+ def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+ def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem,
+ (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2),
+ "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>;
+}
}
def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)),
(CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>;
@@ -1980,24 +2023,24 @@ let Constraints = "$src1 = $dst" in {
multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
- (bitconvert (memopv2i64
+ (bitconvert (memopv2i64
addr:$src2))))]>;
}
multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
string OpcodeStr,
Intrinsic IntId, Intrinsic IntId2> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>;
@@ -2006,7 +2049,7 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId VR128:$src1,
(bitconvert (memopv2i64 addr:$src2))))]>;
- def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
+ def ri : PDIi8<opc2, ImmForm, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>;
@@ -2015,13 +2058,13 @@ multiclass PDI_binop_rmi_int<bits<8> opc, bits<8> opc2, Format ImmForm,
/// PDI_binop_rm - Simple SSE2 binary operator.
multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : PDI<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]> {
let isCommutable = Commutable;
}
- def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : PDI<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1,
@@ -2416,6 +2459,10 @@ def LFENCE : I<0xAE, MRM_E8, (outs), (ins),
def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
+// Pause. This "instruction" is encoded as "rep; nop", so even though it
+// was introduced with SSE2, it's backward compatible.
+def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP;
+
//TODO: custom lower this so as to never even generate the noop
def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 0)), (NOOP)>;
@@ -2462,7 +2509,7 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
(iPTR 0))), addr:$dst)]>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
- (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
@@ -2903,7 +2950,7 @@ defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw",
defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw",
int_x86_ssse3_pmul_hr_sw,
int_x86_ssse3_pmul_hr_sw_128, 1>;
-
+
defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb",
int_x86_ssse3_pshuf_b,
int_x86_ssse3_pshuf_b_128>;
@@ -3042,10 +3089,10 @@ def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVSSrr (v4f32 (V_SET0PS)),
- (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVSSrr (v4i32 (V_SET0PI)),
- (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src), sub_ss))>;
}
// Splat v2f64 / v2i64
@@ -3181,17 +3228,17 @@ let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr (v4i32 VR128:$src1),
- (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), sub_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr (v2i64 VR128:$src1),
- (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), sub_sd))>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, sub_sd))>,
Requires<[HasSSE2]>;
}
@@ -3464,14 +3511,14 @@ let Constraints = "$src1 = $dst" in {
let Constraints = "$src1 = $dst" in {
multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
ValueType OpVT, bit Commutable = 0> {
- def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
OpSize {
let isCommutable = Commutable;
}
- def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (OpNode VR128:$src1,
@@ -3949,15 +3996,15 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
"#PCMPESTRM128rr PSEUDO!",
- [(set VR128:$dst,
- (int_x86_sse42_pcmpestrm128
+ [(set VR128:$dst,
+ (int_x86_sse42_pcmpestrm128
VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize;
def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"#PCMPESTRM128rm PSEUDO!",
- [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
- VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
+ [(set VR128:$dst, (int_x86_sse42_pcmpestrm128
+ VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>,
OpSize;
}
@@ -3972,7 +4019,7 @@ def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs),
let Defs = [ECX, EFLAGS] in {
multiclass SS42AI_pcmpistri<Intrinsic IntId128> {
- def rr : SS42AI<0x63, MRMSrcReg, (outs),
+ def rr : SS42AI<0x63, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}",
[(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)),
@@ -4003,7 +4050,7 @@ let Uses = [EAX, EDX] in {
def rm : SS42AI<0x61, MRMSrcMem, (outs),
(ins VR128:$src1, i128mem:$src3, i8imm:$src5),
"pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}",
- [(set ECX,
+ [(set ECX,
(IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)),
(implicit EFLAGS)]>, OpSize;
}
@@ -4081,16 +4128,15 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
- (ins VR128:$src1, i32i8imm:$src2),
+ (ins VR128:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
- (ins i128mem:$src1, i32i8imm:$src2),
+ (ins i128mem:$src1, i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
-
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index a3e04b0..98975ea 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -37,7 +37,6 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
@@ -145,6 +144,19 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) {
case X86::XMM7: case X86::XMM15: case X86::MM7:
return 7;
+ case X86::ES:
+ return 0;
+ case X86::CS:
+ return 1;
+ case X86::SS:
+ return 2;
+ case X86::DS:
+ return 3;
+ case X86::FS:
+ return 4;
+ case X86::GS:
+ return 5;
+
default:
assert(isVirtualRegister(RegNo) && "Unknown physical register!");
llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
@@ -158,8 +170,7 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
unsigned SubIdx) const {
switch (SubIdx) {
default: return 0;
- case 1:
- // 8-bit
+ case X86::sub_8bit:
if (B == &X86::GR8RegClass) {
if (A->getSize() == 2 || A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -191,12 +202,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR32RegClass) {
- return A;
}
break;
- case 2:
- // 8-bit hi
+ case X86::sub_8bit_hi:
if (B == &X86::GR8_ABCD_HRegClass) {
if (A == &X86::GR64RegClass || A == &X86::GR64_ABCDRegClass ||
A == &X86::GR64_NOREXRegClass ||
@@ -209,12 +217,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
- } else if (B == &X86::FR64RegClass) {
- return A;
}
break;
- case 3:
- // 16-bit
+ case X86::sub_16bit:
if (B == &X86::GR16RegClass) {
if (A->getSize() == 4 || A->getSize() == 8)
return A;
@@ -238,12 +243,9 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
- } else if (B == &X86::VR128RegClass) {
- return A;
}
break;
- case 4:
- // 32-bit
+ case X86::sub_32bit:
if (B == &X86::GR32RegClass || B == &X86::GR32_NOSPRegClass) {
if (A->getSize() == 8)
return A;
@@ -261,6 +263,18 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR64_ABCDRegClass;
}
break;
+ case X86::sub_ss:
+ if (B == &X86::FR32RegClass)
+ return A;
+ break;
+ case X86::sub_sd:
+ if (B == &X86::FR64RegClass)
+ return A;
+ break;
+ case X86::sub_xmm:
+ if (B == &X86::VR128RegClass)
+ return A;
+ break;
}
return 0;
}
@@ -518,6 +532,30 @@ X86RegisterInfo::getFrameIndexOffset(const MachineFunction &MF, int FI) const {
return Offset;
}
+static unsigned getSUBriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::SUB64ri8;
+ return X86::SUB64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::SUB32ri8;
+ return X86::SUB32ri;
+ }
+}
+
+static unsigned getADDriOpcode(unsigned is64Bit, int64_t Imm) {
+ if (is64Bit) {
+ if (isInt<8>(Imm))
+ return X86::ADD64ri8;
+ return X86::ADD64ri32;
+ } else {
+ if (isInt<8>(Imm))
+ return X86::ADD32ri8;
+ return X86::ADD32ri;
+ }
+}
+
void X86RegisterInfo::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@@ -537,7 +575,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = 0;
if (Old->getOpcode() == getCallFrameSetupOpcode()) {
New = BuildMI(MF, Old->getDebugLoc(),
- TII.get(Is64Bit ? X86::SUB64ri32 : X86::SUB32ri),
+ TII.get(getSUBriOpcode(Is64Bit, Amount)),
StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -549,9 +587,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
Amount -= CalleeAmt;
if (Amount) {
- unsigned Opc = (Amount < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri);
+ unsigned Opc = getADDriOpcode(Is64Bit, Amount);
New = BuildMI(MF, Old->getDebugLoc(), TII.get(Opc), StackPtr)
.addReg(StackPtr)
.addImm(Amount);
@@ -571,9 +607,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
// something off the stack pointer, add it back. We do this until we have
// more advanced stack pointer tracking ability.
if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
- unsigned Opc = (CalleeAmt < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri);
+ unsigned Opc = getSUBriOpcode(Is64Bit, CalleeAmt);
MachineInstr *Old = I;
MachineInstr *New =
BuildMI(MF, Old->getDebugLoc(), TII.get(Opc),
@@ -691,13 +725,9 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
const TargetInstrInfo &TII) {
bool isSub = NumBytes < 0;
uint64_t Offset = isSub ? -NumBytes : NumBytes;
- unsigned Opc = isSub
- ? ((Offset < 128) ?
- (Is64Bit ? X86::SUB64ri8 : X86::SUB32ri8) :
- (Is64Bit ? X86::SUB64ri32 : X86::SUB32ri))
- : ((Offset < 128) ?
- (Is64Bit ? X86::ADD64ri8 : X86::ADD32ri8) :
- (Is64Bit ? X86::ADD64ri32 : X86::ADD32ri));
+ unsigned Opc = isSub ?
+ getSUBriOpcode(Is64Bit, Offset) :
+ getADDriOpcode(Is64Bit, Offset);
uint64_t Chunk = (1LL << 31) - 1;
DebugLoc DL = MBB.findDebugLoc(MBBI);
@@ -899,7 +929,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (Is64Bit && !Fn->hasFnAttr(Attribute::NoRedZone) &&
!needsStackRealignment(MF) &&
!MFI->hasVarSizedObjects() && // No dynamic alloca.
- !MFI->hasCalls() && // No calls.
+ !MFI->adjustsStack() && // No calls.
!Subtarget->isTargetWin64()) { // Win64 has no Red Zone
uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
if (HasFP) MinSize += SlotSize;
@@ -917,7 +947,8 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
// size is bigger than the callers.
if (TailCallReturnAddrDelta < 0) {
MachineInstr *MI =
- BuildMI(MBB, MBBI, DL, TII.get(Is64Bit? X86::SUB64ri32 : X86::SUB32ri),
+ BuildMI(MBB, MBBI, DL,
+ TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
StackPtr)
.addReg(StackPtr)
.addImm(-TailCallReturnAddrDelta);
@@ -1307,7 +1338,7 @@ X86RegisterInfo::getInitialFrameState(std::vector<MachineMove> &Moves) const {
// Calculate amount of bytes used for return address storing
int stackGrowth = (Is64Bit ? -8 : -4);
- // Initial state of the frame pointer is esp+4.
+ // Initial state of the frame pointer is esp+stackGrowth.
MachineLocation Dst(MachineLocation::VirtualFP);
MachineLocation Src(StackPtr, stackGrowth);
Moves.push_back(MachineMove(0, Dst, Src));
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index ac96c4c..d0b82e2 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -30,16 +30,6 @@ namespace N86 {
};
}
-namespace X86 {
- /// SubregIndex - The index of various sized subregister classes. Note that
- /// these indices must be kept in sync with the class indices in the
- /// X86RegisterInfo.td file.
- enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
- SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
- };
-}
-
/// DWARFFlavour - Flavour of dwarf regnumbers
///
namespace DWARFFlavour {
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 49a6ca0..91cfaa9 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -18,6 +18,17 @@
//
let Namespace = "X86" in {
+ // Subregister indices.
+ def sub_8bit : SubRegIndex;
+ def sub_8bit_hi : SubRegIndex;
+ def sub_16bit : SubRegIndex;
+ def sub_32bit : SubRegIndex;
+
+ def sub_ss : SubRegIndex;
+ def sub_sd : SubRegIndex;
+ def sub_xmm : SubRegIndex;
+
+
// In the register alias definitions below, we define which registers alias
// which others. We only specify which registers the small registers alias,
// because the register file generator is smart enough to figure out that
@@ -57,17 +68,22 @@ let Namespace = "X86" in {
def BH : Register<"bh">, DwarfRegNum<[3, 3, 3]>;
// 16-bit registers
+ let SubRegIndices = [sub_8bit, sub_8bit_hi] in {
def AX : RegisterWithSubRegs<"ax", [AL,AH]>, DwarfRegNum<[0, 0, 0]>;
def DX : RegisterWithSubRegs<"dx", [DL,DH]>, DwarfRegNum<[1, 2, 2]>;
def CX : RegisterWithSubRegs<"cx", [CL,CH]>, DwarfRegNum<[2, 1, 1]>;
def BX : RegisterWithSubRegs<"bx", [BL,BH]>, DwarfRegNum<[3, 3, 3]>;
+ }
+ let SubRegIndices = [sub_8bit] in {
def SI : RegisterWithSubRegs<"si", [SIL]>, DwarfRegNum<[4, 6, 6]>;
def DI : RegisterWithSubRegs<"di", [DIL]>, DwarfRegNum<[5, 7, 7]>;
def BP : RegisterWithSubRegs<"bp", [BPL]>, DwarfRegNum<[6, 4, 5]>;
def SP : RegisterWithSubRegs<"sp", [SPL]>, DwarfRegNum<[7, 5, 4]>;
+ }
def IP : Register<"ip">, DwarfRegNum<[16]>;
// X86-64 only
+ let SubRegIndices = [sub_8bit] in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -76,8 +92,9 @@ let Namespace = "X86" in {
def R13W : RegisterWithSubRegs<"r13w", [R13B]>, DwarfRegNum<[13, -2, -2]>;
def R14W : RegisterWithSubRegs<"r14w", [R14B]>, DwarfRegNum<[14, -2, -2]>;
def R15W : RegisterWithSubRegs<"r15w", [R15B]>, DwarfRegNum<[15, -2, -2]>;
-
+ }
// 32-bit registers
+ let SubRegIndices = [sub_16bit] in {
def EAX : RegisterWithSubRegs<"eax", [AX]>, DwarfRegNum<[0, 0, 0]>;
def EDX : RegisterWithSubRegs<"edx", [DX]>, DwarfRegNum<[1, 2, 2]>;
def ECX : RegisterWithSubRegs<"ecx", [CX]>, DwarfRegNum<[2, 1, 1]>;
@@ -97,8 +114,10 @@ let Namespace = "X86" in {
def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
+ }
// 64-bit registers, X86-64 only
+ let SubRegIndices = [sub_32bit] in {
def RAX : RegisterWithSubRegs<"rax", [EAX]>, DwarfRegNum<[0, -2, -2]>;
def RDX : RegisterWithSubRegs<"rdx", [EDX]>, DwarfRegNum<[1, -2, -2]>;
def RCX : RegisterWithSubRegs<"rcx", [ECX]>, DwarfRegNum<[2, -2, -2]>;
@@ -117,6 +136,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
+ }
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -137,7 +157,9 @@ let Namespace = "X86" in {
def FP5 : Register<"fp5">;
def FP6 : Register<"fp6">;
- // XMM Registers, used by the various SSE instruction set extensions
+ // XMM Registers, used by the various SSE instruction set extensions.
+ // The sub_ss and sub_sd subregs are the same registers with another regclass.
+ let CompositeIndices = [(sub_ss), (sub_sd)] in {
def XMM0: Register<"xmm0">, DwarfRegNum<[17, 21, 21]>;
def XMM1: Register<"xmm1">, DwarfRegNum<[18, 22, 22]>;
def XMM2: Register<"xmm2">, DwarfRegNum<[19, 23, 23]>;
@@ -156,8 +178,10 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ }
// YMM Registers, used by AVX instructions
+ let SubRegIndices = [sub_xmm] in {
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
@@ -174,6 +198,7 @@ let Namespace = "X86" in {
def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
+ }
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -207,106 +232,19 @@ let Namespace = "X86" in {
def DR7 : Register<"dr7">;
// Condition registers
- def ECR0 : Register<"ecr0">;
- def ECR1 : Register<"ecr1">;
- def ECR2 : Register<"ecr2">;
- def ECR3 : Register<"ecr3">;
- def ECR4 : Register<"ecr4">;
- def ECR5 : Register<"ecr5">;
- def ECR6 : Register<"ecr6">;
- def ECR7 : Register<"ecr7">;
-
- def RCR0 : Register<"rcr0">;
- def RCR1 : Register<"rcr1">;
- def RCR2 : Register<"rcr2">;
- def RCR3 : Register<"rcr3">;
- def RCR4 : Register<"rcr4">;
- def RCR5 : Register<"rcr5">;
- def RCR6 : Register<"rcr6">;
- def RCR7 : Register<"rcr7">;
- def RCR8 : Register<"rcr8">;
+ def CR0 : Register<"cr0">;
+ def CR1 : Register<"cr1">;
+ def CR2 : Register<"cr2">;
+ def CR3 : Register<"cr3">;
+ def CR4 : Register<"cr4">;
+ def CR5 : Register<"cr5">;
+ def CR6 : Register<"cr6">;
+ def CR7 : Register<"cr7">;
+ def CR8 : Register<"cr8">;
}
//===----------------------------------------------------------------------===//
-// Subregister Set Definitions... now that we have all of the pieces, define the
-// sub registers for each register.
-//
-
-def x86_subreg_8bit : PatLeaf<(i32 1)>;
-def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
-def x86_subreg_16bit : PatLeaf<(i32 3)>;
-def x86_subreg_32bit : PatLeaf<(i32 4)>;
-
-def x86_subreg_ss : PatLeaf<(i32 1)>;
-def x86_subreg_sd : PatLeaf<(i32 2)>;
-def x86_subreg_xmm : PatLeaf<(i32 3)>;
-
-def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [AX, CX, DX, BX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<1, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [EAX, ECX, EDX, EBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<1, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AL, CL, DL, BL, SPL, BPL, SIL, DIL,
- R8B, R9B, R10B, R11B, R12B, R13B, R14B, R15B]>;
-
-def : SubRegSet<2, [RAX, RCX, RDX, RBX],
- [AH, CH, DH, BH]>;
-
-def : SubRegSet<3, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [AX, CX, DX, BX, SP, BP, SI, DI,
- R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W]>;
-
-def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
- R8, R9, R10, R11, R12, R13, R14, R15],
- [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
- R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
- YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
- [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
- XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
-
-//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
// implicitly defined to be the register allocation order.
@@ -370,7 +308,7 @@ def GR8 : RegisterClass<"X86", [i8], 8,
def GR16 : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP,
R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
- let SubRegClassList = [GR8, GR8];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -422,7 +360,7 @@ def GR16 : RegisterClass<"X86", [i16], 16,
def GR32 : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -477,7 +415,9 @@ def GR32 : RegisterClass<"X86", [i32], 32,
def GR64 : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32 sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -511,14 +451,8 @@ def DEBUG_REG : RegisterClass<"X86", [i32], 32,
}
// Control registers.
-def CONTROL_REG_32 : RegisterClass<"X86", [i32], 32,
- [ECR0, ECR1, ECR2, ECR3, ECR4, ECR5, ECR6,
- ECR7]> {
-}
-
-def CONTROL_REG_64 : RegisterClass<"X86", [i64], 64,
- [RCR0, RCR1, RCR2, RCR3, RCR4, RCR5, RCR6,
- RCR7, RCR8]> {
+def CONTROL_REG : RegisterClass<"X86", [i64], 64,
+ [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8]> {
}
// GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
@@ -532,20 +466,27 @@ def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]> {
def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]> {
}
def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
}
def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit),
+ (GR32_ABCD sub_32bit)];
}
def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
}
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
R8, R9, R11]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_TC sub_32bit)];
}
// GR8_NOREX - GR8 registers which do not require a REX prefix.
@@ -585,7 +526,7 @@ def GR8_NOREX : RegisterClass<"X86", [i8], 8,
// GR16_NOREX - GR16 registers which do not require a REX prefix.
def GR16_NOREX : RegisterClass<"X86", [i16], 16,
[AX, CX, DX, SI, DI, BX, BP, SP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -608,7 +549,8 @@ def GR16_NOREX : RegisterClass<"X86", [i16], 16,
// GR32_NOREX - GR32 registers which do not require a REX prefix.
def GR32_NOREX : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -631,7 +573,9 @@ def GR32_NOREX : RegisterClass<"X86", [i32], 32,
// GR64_NOREX - GR64 registers which do not require a REX prefix.
def GR64_NOREX : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -656,7 +600,7 @@ def GR64_NOREX : RegisterClass<"X86", [i64], 64,
def GR32_NOSP : RegisterClass<"X86", [i32], 32,
[EAX, ECX, EDX, ESI, EDI, EBX, EBP,
R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
- let SubRegClassList = [GR8, GR8, GR16];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
let MethodProtos = [{
iterator allocation_order_begin(const MachineFunction &MF) const;
iterator allocation_order_end(const MachineFunction &MF) const;
@@ -709,7 +653,9 @@ def GR32_NOSP : RegisterClass<"X86", [i32], 32,
def GR64_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
RBX, R14, R15, R12, R13, RBP]> {
- let SubRegClassList = [GR8, GR8, GR16, GR32_NOSP];
+ let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
+ (GR16 sub_16bit),
+ (GR32_NOSP sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -734,7 +680,9 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
// GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
[RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
- let SubRegClassList = [GR8_NOREX, GR8_NOREX, GR16_NOREX, GR32_NOREX];
+ let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
+ (GR16_NOREX sub_16bit),
+ (GR32_NOREX sub_32bit)];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -758,7 +706,9 @@ def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
// A class to support the 'A' assembler constraint: EAX then EDX.
def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
- let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD];
+ let SubRegClasses = [(GR8_ABCD_L sub_8bit),
+ (GR8_ABCD_H sub_8bit_hi),
+ (GR16_ABCD sub_16bit)];
}
// Scalar SSE2 floating point registers.
@@ -836,7 +786,8 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
- let SubRegClassList = [FR32, FR64];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
+
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -856,7 +807,7 @@ def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]> {
- let SubRegClassList = [FR32, FR64, VR128];
+ let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
}
// Status flags registers.
diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp
index cd87b82..6297a27 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -12,11 +12,232 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "x86-selectiondag-info"
-#include "X86SelectionDAGInfo.h"
+#include "X86TargetMachine.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
-X86SelectionDAGInfo::X86SelectionDAGInfo() {
+X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) :
+ TargetSelectionDAGInfo(TM),
+ Subtarget(&TM.getSubtarget<X86Subtarget>()),
+ TLI(*TM.getTargetLowering()) {
}
X86SelectionDAGInfo::~X86SelectionDAGInfo() {
}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const {
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+
+ // If not DWORD aligned or size is more than the threshold, call the library.
+ // The libc version is likely to be faster for these cases. It can use the
+ // address value and run time information about the CPU.
+ if ((Align & 3) != 0 ||
+ !ConstantSize ||
+ ConstantSize->getZExtValue() >
+ Subtarget->getMaxInlineSizeThreshold()) {
+ SDValue InFlag(0, 0);
+
+ // Check to see if there is a specialized entry-point for memory zeroing.
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src);
+
+ if (const char *bzeroEntry = V &&
+ V->isNullValue() ? Subtarget->getBZeroEntry() : 0) {
+ EVT IntPtr = TLI.getPointerTy();
+ const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext());
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ Entry.Node = Dst;
+ Entry.Ty = IntPtrTy;
+ Args.push_back(Entry);
+ Entry.Node = Size;
+ Args.push_back(Entry);
+ std::pair<SDValue,SDValue> CallResult =
+ TLI.LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
+ false, false, false, false,
+ 0, CallingConv::C, false, /*isReturnValueUsed=*/false,
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args,
+ DAG, dl);
+ return CallResult.second;
+ }
+
+ // Otherwise have the target-independent code call memset.
+ return SDValue();
+ }
+
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ SDValue InFlag(0, 0);
+ EVT AVT;
+ SDValue Count;
+ ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src);
+ unsigned BytesLeft = 0;
+ bool TwoRepStos = false;
+ if (ValC) {
+ unsigned ValReg;
+ uint64_t Val = ValC->getZExtValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ ValReg = X86::AX;
+ Val = (Val << 8) | Val;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ ValReg = X86::EAX;
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
+ AVT = MVT::i64;
+ ValReg = X86::RAX;
+ Val = (Val << 32) | Val;
+ }
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ ValReg = X86::AL;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ break;
+ }
+
+ if (AVT.bitsGT(MVT::i8)) {
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ Count = DAG.getIntPtrConstant(SizeVal / UBytes);
+ BytesLeft = SizeVal % UBytes;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = DAG.getIntPtrConstant(SizeVal);
+ Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+
+ if (TwoRepStos) {
+ InFlag = Chain.getValue(1);
+ Count = Size;
+ EVT CVT = Count.getValueType();
+ SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count,
+ DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
+ Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX :
+ X86::ECX,
+ Left, InFlag);
+ InFlag = Chain.getValue(1);
+ Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag };
+ Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops));
+ } else if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT AddrVT = Dst.getValueType();
+ EVT SizeVT = Size.getValueType();
+
+ Chain = DAG.getMemset(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, AddrVT, Dst,
+ DAG.getConstant(Offset, AddrVT)),
+ Src,
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, DstSV, DstSVOff + Offset);
+ }
+
+ // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
+ return Chain;
+}
+
+SDValue
+X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain, SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const {
+ // This requires the copy size to be a constant, preferrably
+ // within a subtarget-specific limit.
+ ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
+ if (!ConstantSize)
+ return SDValue();
+ uint64_t SizeVal = ConstantSize->getZExtValue();
+ if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold())
+ return SDValue();
+
+ /// If not DWORD aligned, call the library.
+ if ((Align & 3) != 0)
+ return SDValue();
+
+ // DWORD aligned
+ EVT AVT = MVT::i32;
+ if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
+ AVT = MVT::i64;
+
+ unsigned UBytes = AVT.getSizeInBits() / 8;
+ unsigned CountVal = SizeVal / UBytes;
+ SDValue Count = DAG.getIntPtrConstant(CountVal);
+ unsigned BytesLeft = SizeVal % UBytes;
+
+ SDValue InFlag(0, 0);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX :
+ X86::ECX,
+ Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI :
+ X86::EDI,
+ Dst, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI :
+ X86::ESI,
+ Src, InFlag);
+ InFlag = Chain.getValue(1);
+
+ SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag };
+ SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops,
+ array_lengthof(Ops));
+
+ SmallVector<SDValue, 4> Results;
+ Results.push_back(RepMovs);
+ if (BytesLeft) {
+ // Handle the last 1 - 7 bytes.
+ unsigned Offset = SizeVal - BytesLeft;
+ EVT DstVT = Dst.getValueType();
+ EVT SrcVT = Src.getValueType();
+ EVT SizeVT = Size.getValueType();
+ Results.push_back(DAG.getMemcpy(Chain, dl,
+ DAG.getNode(ISD::ADD, dl, DstVT, Dst,
+ DAG.getConstant(Offset, DstVT)),
+ DAG.getNode(ISD::ADD, dl, SrcVT, Src,
+ DAG.getConstant(Offset, SrcVT)),
+ DAG.getConstant(BytesLeft, SizeVT),
+ Align, isVolatile, AlwaysInline,
+ DstSV, DstSVOff + Offset,
+ SrcSV, SrcSVOff + Offset));
+ }
+
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &Results[0], Results.size());
+}
diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h
index 9834754..4f30f31 100644
--- a/lib/Target/X86/X86SelectionDAGInfo.h
+++ b/lib/Target/X86/X86SelectionDAGInfo.h
@@ -18,10 +18,40 @@
namespace llvm {
+class X86TargetLowering;
+class X86TargetMachine;
+class X86Subtarget;
+
class X86SelectionDAGInfo : public TargetSelectionDAGInfo {
+ /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const X86Subtarget *Subtarget;
+
+ const X86TargetLowering &TLI;
+
public:
- X86SelectionDAGInfo();
+ explicit X86SelectionDAGInfo(const X86TargetMachine &TM);
~X86SelectionDAGInfo();
+
+ virtual
+ SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile,
+ const Value *DstSV,
+ uint64_t DstSVOff) const;
+
+ virtual
+ SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
+ SDValue Chain,
+ SDValue Dst, SDValue Src,
+ SDValue Size, unsigned Align,
+ bool isVolatile, bool AlwaysInline,
+ const Value *DstSV,
+ uint64_t DstSVOff,
+ const Value *SrcSV,
+ uint64_t SrcSVOff) const;
};
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f39904e..f2c5058 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,17 +17,13 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
-#include "llvm/Support/CommandLine.h"
-
using namespace llvm;
-static cl::opt<bool> DisableSSEDomain("disable-sse-domain",
- cl::init(false), cl::Hidden,
- cl::desc("Disable SSE Domain Fixing"));
-
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
@@ -43,6 +39,18 @@ static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
}
}
+static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
+ MCContext &Ctx, TargetAsmBackend &TAB,
+ raw_ostream &_OS,
+ MCCodeEmitter *_Emitter,
+ bool RelaxAll) {
+ Triple TheTriple(TT);
+ switch (TheTriple.getOS()) {
+ default:
+ return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
+ }
+}
+
extern "C" void LLVMInitializeX86Target() {
// Register the target.
RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
@@ -63,6 +71,12 @@ extern "C" void LLVMInitializeX86Target() {
createX86_32AsmBackend);
TargetRegistry::RegisterAsmBackend(TheX86_64Target,
createX86_64AsmBackend);
+
+ // Register the object streamer.
+ TargetRegistry::RegisterObjectStreamer(TheX86_32Target,
+ createMCStreamer);
+ TargetRegistry::RegisterObjectStreamer(TheX86_64Target,
+ createMCStreamer);
}
@@ -88,7 +102,8 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget.getStackAlignment(),
(Subtarget.isTargetWin64() ? -40 :
(Subtarget.is64Bit() ? -8 : -4))),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this), TSInfo(*this),
+ ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
// If no relocation model was picked, default as appropriate for the target.
@@ -178,8 +193,7 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2() &&
- !DisableSSEDomain) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
PM.add(createSSEDomainFixPass());
return true;
}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index dc4234c..f9fb424 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -23,6 +23,7 @@
#include "X86JITInfo.h"
#include "X86Subtarget.h"
#include "X86ISelLowering.h"
+#include "X86SelectionDAGInfo.h"
namespace llvm {
@@ -35,6 +36,7 @@ class X86TargetMachine : public LLVMTargetMachine {
X86InstrInfo InstrInfo;
X86JITInfo JITInfo;
X86TargetLowering TLInfo;
+ X86SelectionDAGInfo TSInfo;
X86ELFWriterInfo ELFWriterInfo;
Reloc::Model DefRelocModel; // Reloc model before it's overridden.
@@ -54,6 +56,9 @@ public:
virtual const X86TargetLowering *getTargetLowering() const {
return &TLInfo;
}
+ virtual const X86SelectionDAGInfo *getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
virtual const X86RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 3990b8b..b230572 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -80,7 +80,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setShiftAmountType(MVT::i32);
setStackPointerRegisterToSaveRestore(XCore::SP);
- setSchedulingPreference(SchedulingForRegPressure);
+ setSchedulingPreference(Sched::RegPressure);
// Use i32 for setcc operations results (slt, sgt, ...).
setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index c983112..5260258 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -361,9 +361,8 @@ bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const {
- DebugLoc DL;
- if (I != MBB.end()) DL = I->getDebugLoc();
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const {
if (DestRC == SrcRC) {
if (DestRC == XCore::GRRegsRegisterClass) {
@@ -395,7 +394,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned SrcReg, bool isKill,
int FrameIndex,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -408,7 +408,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const
{
DebugLoc DL;
if (I != MBB.end()) DL = I->getDebugLoc();
@@ -419,7 +420,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const {
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const {
if (CSI.empty()) {
return true;
}
@@ -437,7 +439,7 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MBB.addLiveIn(it->getReg());
storeRegToStackSlot(MBB, MI, it->getReg(), true,
- it->getFrameIdx(), it->getRegClass());
+ it->getFrameIdx(), it->getRegClass(), &RI);
if (emitFrameMoves) {
MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol();
BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel);
@@ -449,7 +451,8 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const
{
bool AtStart = MI == MBB.begin();
MachineBasicBlock::iterator BeforeI = MI;
@@ -460,7 +463,7 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
loadRegFromStackSlot(MBB, MI, it->getReg(),
it->getFrameIdx(),
- it->getRegClass());
+ it->getRegClass(), &RI);
assert(MI != MBB.begin() &&
"loadRegFromStackSlot didn't insert any code!");
// Insert in reverse order. loadRegFromStackSlot can insert multiple
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 3e0a765..9035ea9 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -67,25 +67,30 @@ public:
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
const TargetRegisterClass *DestRC,
- const TargetRegisterClass *SrcRC) const;
+ const TargetRegisterClass *SrcRC,
+ DebugLoc DL) const;
virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned SrcReg, bool isKill, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
unsigned DestReg, int FrameIndex,
- const TargetRegisterClass *RC) const;
+ const TargetRegisterClass *RC,
+ const TargetRegisterInfo *TRI) const;
virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
- const std::vector<CalleeSavedInfo> &CSI) const;
+ const std::vector<CalleeSavedInfo> &CSI,
+ const TargetRegisterInfo *TRI) const;
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 6aac237..44aeb60 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -12,10 +12,11 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "xcore-selectiondag-info"
-#include "XCoreSelectionDAGInfo.h"
+#include "XCoreTargetMachine.h"
using namespace llvm;
-XCoreSelectionDAGInfo::XCoreSelectionDAGInfo() {
+XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM)
+ : TargetSelectionDAGInfo(TM) {
}
XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() {
diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h
index fd96716..0386968 100644
--- a/lib/Target/XCore/XCoreSelectionDAGInfo.h
+++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h
@@ -18,9 +18,11 @@
namespace llvm {
+class XCoreTargetMachine;
+
class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo {
public:
- XCoreSelectionDAGInfo();
+ explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM);
~XCoreSelectionDAGInfo();
};
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 267f46a..b0013eb 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -28,7 +28,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
"i16:16:32-i32:32:32-i64:32:32-n32"),
InstrInfo(),
FrameInfo(*this),
- TLInfo(*this) {
+ TLInfo(*this),
+ TSInfo(*this) {
}
bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 701a6f1..14073ba 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -20,6 +20,7 @@
#include "XCoreSubtarget.h"
#include "XCoreInstrInfo.h"
#include "XCoreISelLowering.h"
+#include "XCoreSelectionDAGInfo.h"
namespace llvm {
@@ -29,6 +30,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
XCoreInstrInfo InstrInfo;
XCoreFrameInfo FrameInfo;
XCoreTargetLowering TLInfo;
+ XCoreSelectionDAGInfo TSInfo;
public:
XCoreTargetMachine(const Target &T, const std::string &TT,
const std::string &FS);
@@ -40,6 +42,10 @@ public:
return &TLInfo;
}
+ virtual const XCoreSelectionDAGInfo* getSelectionDAGInfo() const {
+ return &TSInfo;
+ }
+
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 6443dd4..692e47d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -535,14 +535,14 @@ void DAE::MarkValue(const RetOrArg &RA, Liveness L,
/// values (according to Uses) live as well.
void DAE::MarkLive(const Function &F) {
DEBUG(dbgs() << "DAE - Intrinsically live fn: " << F.getName() << "\n");
- // Mark the function as live.
- LiveFunctions.insert(&F);
- // Mark all arguments as live.
- for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
- PropagateLiveness(CreateArg(&F, i));
- // Mark all return values as live.
- for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
- PropagateLiveness(CreateRet(&F, i));
+ // Mark the function as live.
+ LiveFunctions.insert(&F);
+ // Mark all arguments as live.
+ for (unsigned i = 0, e = F.arg_size(); i != e; ++i)
+ PropagateLiveness(CreateArg(&F, i));
+ // Mark all return values as live.
+ for (unsigned i = 0, e = NumRetVals(&F); i != e; ++i)
+ PropagateLiveness(CreateRet(&F, i));
}
/// MarkLive - Mark the given return value or argument as live. Additionally,
@@ -859,7 +859,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
Value *RetVal;
- if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
+ if (NFTy->getReturnType()->isVoidTy()) {
RetVal = 0;
} else {
assert (RetTy->isStructTy());
diff --git a/lib/Transforms/IPO/InlineAlways.cpp b/lib/Transforms/IPO/InlineAlways.cpp
index bc8028c..8e312e7 100644
--- a/lib/Transforms/IPO/InlineAlways.cpp
+++ b/lib/Transforms/IPO/InlineAlways.cpp
@@ -54,6 +54,9 @@ namespace {
return removeDeadFunctions(CG, &NeverInline);
}
virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
};
}
diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp
index 46cf4b2..74b4a1c 100644
--- a/lib/Transforms/IPO/InlineSimple.cpp
+++ b/lib/Transforms/IPO/InlineSimple.cpp
@@ -49,6 +49,9 @@ namespace {
CA.growCachedCostInfo(Caller, Callee);
}
virtual bool doInitialization(CallGraph &CG);
+ void releaseMemory() {
+ CA.clear();
+ }
};
}
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index b07e22c..622a9b5 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -17,32 +17,55 @@
// important that the hash function be high quality. The equality comparison
// iterates through each instruction in each basic block.
//
-// When a match is found, the functions are folded. We can only fold two
-// functions when we know that the definition of one of them is not
-// overridable.
+// When a match is found the functions are folded. If both functions are
+// overridable, we move the functionality into a new internal function and
+// leave two overridable thunks to it.
//
//===----------------------------------------------------------------------===//
//
// Future work:
//
-// * fold vector<T*>::push_back and vector<S*>::push_back.
-//
-// These two functions have different types, but in a way that doesn't matter
-// to us. As long as we never see an S or T itself, using S* and S** is the
-// same as using a T* and T**.
-//
// * virtual functions.
//
// Many functions have their address taken by the virtual function table for
// the object they belong to. However, as long as it's only used for a lookup
// and call, this is irrelevant, and we'd like to fold such implementations.
//
+// * use SCC to cut down on pair-wise comparisons and solve larger cycles.
+//
+// The current implementation loops over a pair-wise comparison of all
+// functions in the program where the two functions in the pair are treated as
+// assumed to be equal until proven otherwise. We could both use fewer
+// comparisons and optimize more complex cases if we used strongly connected
+// components of the call graph.
+//
+// * be smarter about bitcast.
+//
+// In order to fold functions, we will sometimes add either bitcast instructions
+// or bitcast constant expressions. Unfortunately, this can confound further
+// analysis since the two functions differ where one has a bitcast and the
+// other doesn't. We should learn to peer through bitcasts without imposing bad
+// performance properties.
+//
+// * don't emit aliases for Mach-O.
+//
+// Mach-O doesn't support aliases which means that we must avoid introducing
+// them in the bitcode on architectures which don't support them, such as
+// Mac OSX. There's a few approaches to this problem;
+// a) teach codegen to lower global aliases to thunks on platforms which don't
+// support them.
+// b) always emit thunks, and create a separate thunk-to-alias pass which
+// runs on ELF systems. This has the added benefit of transforming other
+// thunks such as those produced by a C++ frontend into aliases when legal
+// to do so.
+//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "mergefunc"
#include "llvm/Transforms/IPO.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
#include "llvm/InlineAsm.h"
@@ -54,6 +77,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetData.h"
#include <map>
#include <vector>
using namespace llvm;
@@ -61,17 +85,33 @@ using namespace llvm;
STATISTIC(NumFunctionsMerged, "Number of functions merged");
namespace {
- struct MergeFunctions : public ModulePass {
+ class MergeFunctions : public ModulePass {
+ public:
static char ID; // Pass identification, replacement for typeid
MergeFunctions() : ModulePass(&ID) {}
bool runOnModule(Module &M);
+
+ private:
+ bool isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2);
+
+ bool equals(const BasicBlock *BB1, const BasicBlock *BB2);
+ bool equals(const Function *F, const Function *G);
+
+ bool compare(const Value *V1, const Value *V2);
+
+ const Function *LHS, *RHS;
+ typedef DenseMap<const Value *, unsigned long> IDMap;
+ IDMap Map;
+ DenseMap<const Function *, IDMap> Domains;
+ DenseMap<const Function *, unsigned long> DomainCount;
+ TargetData *TD;
};
}
char MergeFunctions::ID = 0;
-static RegisterPass<MergeFunctions>
-X("mergefunc", "Merge Functions");
+static RegisterPass<MergeFunctions> X("mergefunc", "Merge Functions");
ModulePass *llvm::createMergeFunctionsPass() {
return new MergeFunctions();
@@ -95,15 +135,6 @@ static unsigned long hash(const Function *F) {
return ID.ComputeHash();
}
-/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by
-/// walking the chain of cast operands. Otherwise, returns the argument.
-static Value* IgnoreBitcasts(Value *V) {
- while (BitCastInst *BC = dyn_cast<BitCastInst>(V))
- V = BC->getOperand(0);
-
- return V;
-}
-
/// isEquivalentType - any two pointers are equivalent. Otherwise, standard
/// type equivalence rules apply.
static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
@@ -113,6 +144,14 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return false;
switch(Ty1->getTypeID()) {
+ default:
+ llvm_unreachable("Unknown type!");
+ // Fall through in Release-Asserts mode.
+ case Type::IntegerTyID:
+ case Type::OpaqueTyID:
+ // Ty1 == Ty2 would have returned true earlier.
+ return false;
+
case Type::VoidTyID:
case Type::FloatTyID:
case Type::DoubleTyID:
@@ -123,15 +162,6 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
case Type::MetadataTyID:
return true;
- case Type::IntegerTyID:
- case Type::OpaqueTyID:
- // Ty1 == Ty2 would have returned true earlier.
- return false;
-
- default:
- llvm_unreachable("Unknown type!");
- return false;
-
case Type::PointerTyID: {
const PointerType *PTy1 = cast<PointerType>(Ty1);
const PointerType *PTy2 = cast<PointerType>(Ty2);
@@ -154,6 +184,21 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) {
return true;
}
+ case Type::UnionTyID: {
+ const UnionType *UTy1 = cast<UnionType>(Ty1);
+ const UnionType *UTy2 = cast<UnionType>(Ty2);
+
+ // TODO: we could be fancy with union(A, union(A, B)) === union(A, B), etc.
+ if (UTy1->getNumElements() != UTy2->getNumElements())
+ return false;
+
+ for (unsigned i = 0, e = UTy1->getNumElements(); i != e; ++i) {
+ if (!isEquivalentType(UTy1->getElementType(i), UTy2->getElementType(i)))
+ return false;
+ }
+ return true;
+ }
+
case Type::FunctionTyID: {
const FunctionType *FTy1 = cast<FunctionType>(Ty1);
const FunctionType *FTy2 = cast<FunctionType>(Ty2);
@@ -236,123 +281,136 @@ isEquivalentOperation(const Instruction *I1, const Instruction *I2) {
return true;
}
-static bool compare(const Value *V, const Value *U) {
- assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) &&
- "Must not compare basic blocks.");
-
- assert(isEquivalentType(V->getType(), U->getType()) &&
- "Two of the same operation have operands of different type.");
+bool MergeFunctions::isEquivalentGEP(const GetElementPtrInst *GEP1,
+ const GetElementPtrInst *GEP2) {
+ if (TD && GEP1->hasAllConstantIndices() && GEP2->hasAllConstantIndices()) {
+ SmallVector<Value *, 8> Indices1, Indices2;
+ for (GetElementPtrInst::const_op_iterator I = GEP1->idx_begin(),
+ E = GEP1->idx_end(); I != E; ++I) {
+ Indices1.push_back(*I);
+ }
+ for (GetElementPtrInst::const_op_iterator I = GEP2->idx_begin(),
+ E = GEP2->idx_end(); I != E; ++I) {
+ Indices2.push_back(*I);
+ }
+ uint64_t Offset1 = TD->getIndexedOffset(GEP1->getPointerOperandType(),
+ Indices1.data(), Indices1.size());
+ uint64_t Offset2 = TD->getIndexedOffset(GEP2->getPointerOperandType(),
+ Indices2.data(), Indices2.size());
+ return Offset1 == Offset2;
+ }
- // TODO: If the constant is an expression of F, we should accept that it's
- // equal to the same expression in terms of G.
- if (isa<Constant>(V))
- return V == U;
+ // Equivalent types aren't enough.
+ if (GEP1->getPointerOperand()->getType() !=
+ GEP2->getPointerOperand()->getType())
+ return false;
- // The caller has ensured that ValueMap[V] != U. Since Arguments are
- // pre-loaded into the ValueMap, and Instructions are added as we go, we know
- // that this can only be a mis-match.
- if (isa<Instruction>(V) || isa<Argument>(V))
+ if (GEP1->getNumOperands() != GEP2->getNumOperands())
return false;
- if (isa<InlineAsm>(V) && isa<InlineAsm>(U)) {
- const InlineAsm *IAF = cast<InlineAsm>(V);
- const InlineAsm *IAG = cast<InlineAsm>(U);
- return IAF->getAsmString() == IAG->getAsmString() &&
- IAF->getConstraintString() == IAG->getConstraintString();
+ for (unsigned i = 0, e = GEP1->getNumOperands(); i != e; ++i) {
+ if (!compare(GEP1->getOperand(i), GEP2->getOperand(i)))
+ return false;
}
- return false;
+ return true;
}
-static bool equals(const BasicBlock *BB1, const BasicBlock *BB2,
- DenseMap<const Value *, const Value *> &ValueMap,
- DenseMap<const Value *, const Value *> &SpeculationMap) {
- // Speculatively add it anyways. If it's false, we'll notice a difference
- // later, and this won't matter.
- ValueMap[BB1] = BB2;
+bool MergeFunctions::compare(const Value *V1, const Value *V2) {
+ if (V1 == LHS || V1 == RHS)
+ if (V2 == LHS || V2 == RHS)
+ return true;
- BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
- BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
+ // TODO: constant expressions in terms of LHS and RHS
+ if (isa<Constant>(V1))
+ return V1 == V2;
- do {
- if (isa<BitCastInst>(FI)) {
- ++FI;
- continue;
- }
- if (isa<BitCastInst>(GI)) {
- ++GI;
- continue;
- }
+ if (isa<InlineAsm>(V1) && isa<InlineAsm>(V2)) {
+ const InlineAsm *IA1 = cast<InlineAsm>(V1);
+ const InlineAsm *IA2 = cast<InlineAsm>(V2);
+ return IA1->getAsmString() == IA2->getAsmString() &&
+ IA1->getConstraintString() == IA2->getConstraintString();
+ }
- if (!isEquivalentOperation(FI, GI))
- return false;
+ // We enumerate constants globally and arguments, basic blocks or
+ // instructions within the function they belong to.
+ const Function *Domain1 = NULL;
+ if (const Argument *A = dyn_cast<Argument>(V1)) {
+ Domain1 = A->getParent();
+ } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V1)) {
+ Domain1 = BB->getParent();
+ } else if (const Instruction *I = dyn_cast<Instruction>(V1)) {
+ Domain1 = I->getParent()->getParent();
+ }
- if (isa<GetElementPtrInst>(FI)) {
- const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI);
- const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI);
- if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) {
- // It's effectively a bitcast.
- ++FI, ++GI;
- continue;
- }
+ const Function *Domain2 = NULL;
+ if (const Argument *A = dyn_cast<Argument>(V2)) {
+ Domain2 = A->getParent();
+ } else if (const BasicBlock *BB = dyn_cast<BasicBlock>(V2)) {
+ Domain2 = BB->getParent();
+ } else if (const Instruction *I = dyn_cast<Instruction>(V2)) {
+ Domain2 = I->getParent()->getParent();
+ }
- // TODO: we only really care about the elements before the index
- if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType())
+ if (Domain1 != Domain2)
+ if (Domain1 != LHS && Domain1 != RHS)
+ if (Domain2 != LHS && Domain2 != RHS)
return false;
- }
- if (ValueMap[FI] == GI) {
- ++FI, ++GI;
- continue;
- }
+ IDMap &Map1 = Domains[Domain1];
+ unsigned long &ID1 = Map1[V1];
+ if (!ID1)
+ ID1 = ++DomainCount[Domain1];
- if (ValueMap[FI] != NULL)
- return false;
+ IDMap &Map2 = Domains[Domain2];
+ unsigned long &ID2 = Map2[V2];
+ if (!ID2)
+ ID2 = ++DomainCount[Domain2];
- for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
- Value *OpF = IgnoreBitcasts(FI->getOperand(i));
- Value *OpG = IgnoreBitcasts(GI->getOperand(i));
+ return ID1 == ID2;
+}
- if (ValueMap[OpF] == OpG)
- continue;
+bool MergeFunctions::equals(const BasicBlock *BB1, const BasicBlock *BB2) {
+ BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end();
+ BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end();
- if (ValueMap[OpF] != NULL)
+ do {
+ if (!compare(FI, GI))
+ return false;
+
+ if (isa<GetElementPtrInst>(FI) && isa<GetElementPtrInst>(GI)) {
+ const GetElementPtrInst *GEP1 = cast<GetElementPtrInst>(FI);
+ const GetElementPtrInst *GEP2 = cast<GetElementPtrInst>(GI);
+
+ if (!compare(GEP1->getPointerOperand(), GEP2->getPointerOperand()))
return false;
- if (OpF->getValueID() != OpG->getValueID() ||
- !isEquivalentType(OpF->getType(), OpG->getType()))
+ if (!isEquivalentGEP(GEP1, GEP2))
+ return false;
+ } else {
+ if (!isEquivalentOperation(FI, GI))
return false;
- if (isa<PHINode>(FI)) {
- if (SpeculationMap[OpF] == NULL)
- SpeculationMap[OpF] = OpG;
- else if (SpeculationMap[OpF] != OpG)
- return false;
- continue;
- } else if (isa<BasicBlock>(OpF)) {
- assert(isa<TerminatorInst>(FI) &&
- "BasicBlock referenced by non-Terminator non-PHI");
- // This call changes the ValueMap, hence we can't use
- // Value *& = ValueMap[...]
- if (!equals(cast<BasicBlock>(OpF), cast<BasicBlock>(OpG), ValueMap,
- SpeculationMap))
- return false;
- } else {
+ for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
+ Value *OpF = FI->getOperand(i);
+ Value *OpG = GI->getOperand(i);
+
if (!compare(OpF, OpG))
return false;
- }
- ValueMap[OpF] = OpG;
+ if (OpF->getValueID() != OpG->getValueID() ||
+ !isEquivalentType(OpF->getType(), OpG->getType()))
+ return false;
+ }
}
- ValueMap[FI] = GI;
++FI, ++GI;
} while (FI != FE && GI != GE);
return FI == FE && GI == GE;
}
-static bool equals(const Function *F, const Function *G) {
+bool MergeFunctions::equals(const Function *F, const Function *G) {
// We need to recheck everything, but check the things that weren't included
// in the hash first.
@@ -382,27 +440,46 @@ static bool equals(const Function *F, const Function *G) {
if (!isEquivalentType(F->getFunctionType(), G->getFunctionType()))
return false;
- DenseMap<const Value *, const Value *> ValueMap;
- DenseMap<const Value *, const Value *> SpeculationMap;
- ValueMap[F] = G;
-
assert(F->arg_size() == G->arg_size() &&
"Identical functions have a different number of args.");
- for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
- fe = F->arg_end(); fi != fe; ++fi, ++gi)
- ValueMap[fi] = gi;
+ LHS = F;
+ RHS = G;
- if (!equals(&F->getEntryBlock(), &G->getEntryBlock(), ValueMap,
- SpeculationMap))
- return false;
+ // Visit the arguments so that they get enumerated in the order they're
+ // passed in.
+ for (Function::const_arg_iterator fi = F->arg_begin(), gi = G->arg_begin(),
+ fe = F->arg_end(); fi != fe; ++fi, ++gi) {
+ if (!compare(fi, gi))
+ llvm_unreachable("Arguments repeat");
+ }
- for (DenseMap<const Value *, const Value *>::iterator
- I = SpeculationMap.begin(), E = SpeculationMap.end(); I != E; ++I) {
- if (ValueMap[I->first] != I->second)
+ SmallVector<const BasicBlock *, 8> FBBs, GBBs;
+ SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F.
+ FBBs.push_back(&F->getEntryBlock());
+ GBBs.push_back(&G->getEntryBlock());
+ VisitedBBs.insert(FBBs[0]);
+ while (!FBBs.empty()) {
+ const BasicBlock *FBB = FBBs.pop_back_val();
+ const BasicBlock *GBB = GBBs.pop_back_val();
+ if (!compare(FBB, GBB) || !equals(FBB, GBB)) {
+ Domains.clear();
+ DomainCount.clear();
return false;
+ }
+ const TerminatorInst *FTI = FBB->getTerminator();
+ const TerminatorInst *GTI = GBB->getTerminator();
+ assert(FTI->getNumSuccessors() == GTI->getNumSuccessors());
+ for (unsigned i = 0, e = FTI->getNumSuccessors(); i != e; ++i) {
+ if (!VisitedBBs.insert(FTI->getSuccessor(i)))
+ continue;
+ FBBs.push_back(FTI->getSuccessor(i));
+ GBBs.push_back(GTI->getSuccessor(i));
+ }
}
+ Domains.clear();
+ DomainCount.clear();
return true;
}
@@ -476,20 +553,32 @@ static LinkageCategory categorize(const Function *F) {
}
static void ThunkGToF(Function *F, Function *G) {
+ if (!G->mayBeOverridden()) {
+ // Redirect direct callers of G to F.
+ Constant *BitcastF = ConstantExpr::getBitCast(F, G->getType());
+ for (Value::use_iterator UI = G->use_begin(), UE = G->use_end();
+ UI != UE;) {
+ Value::use_iterator TheIter = UI;
+ ++UI;
+ CallSite CS(*TheIter);
+ if (CS && CS.isCallee(TheIter))
+ TheIter.getUse().set(BitcastF);
+ }
+ }
+
Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "",
G->getParent());
BasicBlock *BB = BasicBlock::Create(F->getContext(), "", NewG);
- std::vector<Value *> Args;
+ SmallVector<Value *, 16> Args;
unsigned i = 0;
const FunctionType *FFTy = F->getFunctionType();
for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end();
AI != AE; ++AI) {
- if (FFTy->getParamType(i) == AI->getType())
+ if (FFTy->getParamType(i) == AI->getType()) {
Args.push_back(AI);
- else {
- Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB);
- Args.push_back(BCI);
+ } else {
+ Args.push_back(new BitCastInst(AI, FFTy->getParamType(i), "", BB));
}
++i;
}
@@ -510,8 +599,6 @@ static void ThunkGToF(Function *F, Function *G) {
NewG->takeName(G);
G->replaceAllUsesWith(NewG);
G->eraseFromParent();
-
- // TODO: look at direct callers to G and make them all direct callers to F.
}
static void AliasGToF(Function *F, Function *G) {
@@ -542,67 +629,66 @@ static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
}
switch (catF) {
+ case ExternalStrong:
+ switch (catG) {
case ExternalStrong:
- switch (catG) {
- case ExternalStrong:
- case ExternalWeak:
- ThunkGToF(F, G);
- break;
- case Internal:
- if (G->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- }
+ case ExternalWeak:
+ ThunkGToF(F, G);
break;
+ case Internal:
+ if (G->hasAddressTaken())
+ ThunkGToF(F, G);
+ else
+ AliasGToF(F, G);
+ break;
+ }
+ break;
- case ExternalWeak: {
- assert(catG == ExternalWeak);
+ case ExternalWeak: {
+ assert(catG == ExternalWeak);
- // Make them both thunks to the same internal function.
- F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
- Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
- F->getParent());
- H->copyAttributesFrom(F);
- H->takeName(F);
- F->replaceAllUsesWith(H);
+ // Make them both thunks to the same internal function.
+ F->setAlignment(std::max(F->getAlignment(), G->getAlignment()));
+ Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "",
+ F->getParent());
+ H->copyAttributesFrom(F);
+ H->takeName(F);
+ F->replaceAllUsesWith(H);
- ThunkGToF(F, G);
- ThunkGToF(F, H);
+ ThunkGToF(F, G);
+ ThunkGToF(F, H);
- F->setLinkage(GlobalValue::InternalLinkage);
- } break;
+ F->setLinkage(GlobalValue::InternalLinkage);
+ } break;
- case Internal:
- switch (catG) {
- case ExternalStrong:
- llvm_unreachable(0);
- // fall-through
- case ExternalWeak:
- if (F->hasAddressTaken())
- ThunkGToF(F, G);
- else
- AliasGToF(F, G);
- break;
- case Internal: {
- bool addrTakenF = F->hasAddressTaken();
- bool addrTakenG = G->hasAddressTaken();
- if (!addrTakenF && addrTakenG) {
- std::swap(FnVec[i], FnVec[j]);
- std::swap(F, G);
- std::swap(addrTakenF, addrTakenG);
- }
+ case Internal:
+ switch (catG) {
+ case ExternalStrong:
+ llvm_unreachable(0);
+ // fall-through
+ case ExternalWeak:
+ if (F->hasAddressTaken())
+ ThunkGToF(F, G);
+ else
+ AliasGToF(F, G);
+ break;
+ case Internal: {
+ bool addrTakenF = F->hasAddressTaken();
+ bool addrTakenG = G->hasAddressTaken();
+ if (!addrTakenF && addrTakenG) {
+ std::swap(FnVec[i], FnVec[j]);
+ std::swap(F, G);
+ std::swap(addrTakenF, addrTakenG);
+ }
- if (addrTakenF && addrTakenG) {
- ThunkGToF(F, G);
- } else {
- assert(!addrTakenG);
- AliasGToF(F, G);
- }
- } break;
+ if (addrTakenF && addrTakenG) {
+ ThunkGToF(F, G);
+ } else {
+ assert(!addrTakenG);
+ AliasGToF(F, G);
}
- break;
+ } break;
+ } break;
}
++NumFunctionsMerged;
@@ -619,22 +705,20 @@ bool MergeFunctions::runOnModule(Module &M) {
std::map<unsigned long, std::vector<Function *> > FnMap;
for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- if (F->isDeclaration() || F->isIntrinsic())
+ if (F->isDeclaration())
continue;
FnMap[hash(F)].push_back(F);
}
- // TODO: instead of running in a loop, we could also fold functions in
- // callgraph order. Constructing the CFG probably isn't cheaper than just
- // running in a loop, unless it happened to already be available.
+ TD = getAnalysisIfAvailable<TargetData>();
bool LocalChanged;
do {
LocalChanged = false;
DEBUG(dbgs() << "size: " << FnMap.size() << "\n");
for (std::map<unsigned long, std::vector<Function *> >::iterator
- I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
+ I = FnMap.begin(), E = FnMap.end(); I != E; ++I) {
std::vector<Function *> &FnVec = I->second;
DEBUG(dbgs() << "hash (" << I->first << "): " << FnVec.size() << "\n");
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 310e4a2..6bc8e66 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -229,6 +229,12 @@ static bool StripDebugInfo(Module &M) {
NMD->eraseFromParent();
}
+ NMD = M.getNamedMetadata("llvm.dbg.lv");
+ if (NMD) {
+ Changed = true;
+ NMD->eraseFromParent();
+ }
+
unsigned MDDbgKind = M.getMDKindID("dbg");
for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index bd06499..c7b04a4 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -51,7 +51,7 @@ static inline unsigned getComplexity(Value *V) {
/// InstCombineIRInserter - This is an IRBuilder insertion helper that works
/// just like the normal insertion helper, but also adds any new instructions
/// to the instcombine worklist.
-class VISIBILITY_HIDDEN InstCombineIRInserter
+class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
: public IRBuilderDefaultInserter<true> {
InstCombineWorklist &Worklist;
public:
@@ -65,7 +65,7 @@ public:
};
/// InstCombiner - The -instcombine pass.
-class VISIBILITY_HIDDEN InstCombiner
+class LLVM_LIBRARY_VISIBILITY InstCombiner
: public FunctionPass,
public InstVisitor<InstCombiner, Instruction*> {
TargetData *TD;
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index eb7628e..b0137c4 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -442,7 +442,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
- " to avoid cast: " << CI);
+ " to avoid cast: " << CI << '\n');
Value *Res = EvaluateInDifferentType(Src, DestTy, false);
assert(Res->getType() == DestTy);
return ReplaceInstUsesWith(CI, Res);
@@ -1252,6 +1252,64 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
return commonPointerCastTransforms(CI);
}
+/// OptimizeVectorResize - This input value (which is known to have vector type)
+/// is being zero extended or truncated to the specified vector type. Try to
+/// replace it with a shuffle (and vector/vector bitcast) if possible.
+///
+/// The source and destination vector types may have different element types.
+static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy,
+ InstCombiner &IC) {
+ // We can only do this optimization if the output is a multiple of the input
+ // element size, or the input is a multiple of the output element size.
+ // Convert the input type to have the same element type as the output.
+ const VectorType *SrcTy = cast<VectorType>(InVal->getType());
+
+ if (SrcTy->getElementType() != DestTy->getElementType()) {
+ // The input types don't need to be identical, but for now they must be the
+ // same size. There is no specific reason we couldn't handle things like
+ // <4 x i16> -> <4 x i32> by bitcasting to <2 x i32> but haven't gotten
+ // there yet.
+ if (SrcTy->getElementType()->getPrimitiveSizeInBits() !=
+ DestTy->getElementType()->getPrimitiveSizeInBits())
+ return 0;
+
+ SrcTy = VectorType::get(DestTy->getElementType(), SrcTy->getNumElements());
+ InVal = IC.Builder->CreateBitCast(InVal, SrcTy);
+ }
+
+ // Now that the element types match, get the shuffle mask and RHS of the
+ // shuffle to use, which depends on whether we're increasing or decreasing the
+ // size of the input.
+ SmallVector<Constant*, 16> ShuffleMask;
+ Value *V2;
+ const IntegerType *Int32Ty = Type::getInt32Ty(SrcTy->getContext());
+
+ if (SrcTy->getNumElements() > DestTy->getNumElements()) {
+ // If we're shrinking the number of elements, just shuffle in the low
+ // elements from the input and use undef as the second shuffle input.
+ V2 = UndefValue::get(SrcTy);
+ for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ } else {
+ // If we're increasing the number of elements, shuffle in all of the
+ // elements from InVal and fill the rest of the result elements with zeros
+ // from a constant zero.
+ V2 = Constant::getNullValue(SrcTy);
+ unsigned SrcElts = SrcTy->getNumElements();
+ for (unsigned i = 0, e = SrcElts; i != e; ++i)
+ ShuffleMask.push_back(ConstantInt::get(Int32Ty, i));
+
+ // The excess elements reference the first element of the zero input.
+ ShuffleMask.append(DestTy->getNumElements()-SrcElts,
+ ConstantInt::get(Int32Ty, SrcElts));
+ }
+
+ Constant *Mask = ConstantVector::get(ShuffleMask.data(), ShuffleMask.size());
+ return new ShuffleVectorInst(InVal, V2, Mask);
+}
+
+
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
@@ -1310,6 +1368,18 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
// FIXME: Canonicalize bitcast(insertelement) -> insertelement(bitcast)
}
+
+ // If this is a cast from an integer to vector, check to see if the input
+ // is a trunc or zext of a bitcast from vector. If so, we can replace all
+ // the casts with a shuffle and (potentially) a bitcast.
+ if (isa<IntegerType>(SrcTy) && (isa<TruncInst>(Src) || isa<ZExtInst>(Src))){
+ CastInst *SrcCast = cast<CastInst>(Src);
+ if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
+ if (isa<VectorType>(BCIn->getOperand(0)->getType()))
+ if (Instruction *I = OptimizeVectorResize(BCIn->getOperand(0),
+ cast<VectorType>(DestTy), *this))
+ return I;
+ }
}
if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h
index 9d88621..9100a85 100644
--- a/lib/Transforms/InstCombine/InstCombineWorklist.h
+++ b/lib/Transforms/InstCombine/InstCombineWorklist.h
@@ -22,7 +22,7 @@ namespace llvm {
/// InstCombineWorklist - This is the worklist management logic for
/// InstCombine.
-class VISIBILITY_HIDDEN InstCombineWorklist {
+class LLVM_LIBRARY_VISIBILITY InstCombineWorklist {
SmallVector<Instruction*, 256> Worklist;
DenseMap<Instruction*, unsigned> WorklistMap;
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 5778864..1a3b10c 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -26,6 +26,7 @@ add_llvm_library(LLVMScalarOpts
SimplifyCFGPass.cpp
SimplifyHalfPowrLibCalls.cpp
SimplifyLibCalls.cpp
+ Sink.cpp
TailDuplication.cpp
TailRecursionElimination.cpp
)
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 65b34b1..ca8ab49 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -868,7 +868,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
const Type *StoredValTy = StoredVal->getType();
- uint64_t StoreSize = TD.getTypeSizeInBits(StoredValTy);
+ uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy);
uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy);
// If the store and reload are the same size, we can always reuse it.
@@ -1132,8 +1132,8 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
Instruction *InsertPt, const TargetData &TD){
LLVMContext &Ctx = SrcVal->getType()->getContext();
- uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
- uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+ uint64_t StoreSize = (TD.getTypeSizeInBits(SrcVal->getType()) + 7) / 8;
+ uint64_t LoadSize = (TD.getTypeSizeInBits(LoadTy) + 7) / 8;
IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
@@ -1604,7 +1604,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
}
if (!NeedToSplit.empty()) {
- toSplit.append(NeedToSplit.size(), NeedToSplit.front());
+ toSplit.append(NeedToSplit.begin(), NeedToSplit.end());
return false;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index cf3d16f..86ea3eb 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -107,11 +107,13 @@ namespace {
class RegUseTracker {
typedef DenseMap<const SCEV *, RegSortData> RegUsesTy;
- RegUsesTy RegUses;
+ RegUsesTy RegUsesMap;
SmallVector<const SCEV *, 16> RegSequence;
public:
void CountRegister(const SCEV *Reg, size_t LUIdx);
+ void DropRegister(const SCEV *Reg, size_t LUIdx);
+ void DropUse(size_t LUIdx);
bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
@@ -132,7 +134,7 @@ public:
void
RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
std::pair<RegUsesTy::iterator, bool> Pair =
- RegUses.insert(std::make_pair(Reg, RegSortData()));
+ RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
RegSortData &RSD = Pair.first->second;
if (Pair.second)
RegSequence.push_back(Reg);
@@ -140,11 +142,28 @@ RegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) {
RSD.UsedByIndices.set(LUIdx);
}
+void
+RegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) {
+ RegUsesTy::iterator It = RegUsesMap.find(Reg);
+ assert(It != RegUsesMap.end());
+ RegSortData &RSD = It->second;
+ assert(RSD.UsedByIndices.size() > LUIdx);
+ RSD.UsedByIndices.reset(LUIdx);
+}
+
+void
+RegUseTracker::DropUse(size_t LUIdx) {
+ // Remove the use index from every register's use list.
+ for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end();
+ I != E; ++I)
+ I->second.UsedByIndices.reset(LUIdx);
+}
+
bool
RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
- if (!RegUses.count(Reg)) return false;
+ if (!RegUsesMap.count(Reg)) return false;
const SmallBitVector &UsedByIndices =
- RegUses.find(Reg)->second.UsedByIndices;
+ RegUsesMap.find(Reg)->second.UsedByIndices;
int i = UsedByIndices.find_first();
if (i == -1) return false;
if ((size_t)i != LUIdx) return true;
@@ -152,13 +171,13 @@ RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
}
const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
- RegUsesTy::const_iterator I = RegUses.find(Reg);
- assert(I != RegUses.end() && "Unknown register!");
+ RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
+ assert(I != RegUsesMap.end() && "Unknown register!");
return I->second.UsedByIndices;
}
void RegUseTracker::clear() {
- RegUses.clear();
+ RegUsesMap.clear();
RegSequence.clear();
}
@@ -188,6 +207,8 @@ struct Formula {
unsigned getNumRegs() const;
const Type *getType() const;
+ void DeleteBaseReg(const SCEV *&S);
+
bool referencesReg(const SCEV *S) const;
bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
const RegUseTracker &RegUses) const;
@@ -291,6 +312,13 @@ const Type *Formula::getType() const {
0;
}
+/// DeleteBaseReg - Delete the given base reg from the BaseRegs list.
+void Formula::DeleteBaseReg(const SCEV *&S) {
+ if (&S != &BaseRegs.back())
+ std::swap(S, BaseRegs.back());
+ BaseRegs.pop_back();
+}
+
/// referencesReg - Test if this formula references the given register.
bool Formula::referencesReg(const SCEV *S) const {
return S == ScaledReg ||
@@ -326,6 +354,13 @@ void Formula::print(raw_ostream &OS) const {
if (!First) OS << " + "; else First = false;
OS << "reg(" << **I << ')';
}
+ if (AM.HasBaseReg && BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: HasBaseReg**";
+ } else if (!AM.HasBaseReg && !BaseRegs.empty()) {
+ if (!First) OS << " + "; else First = false;
+ OS << "**error: !HasBaseReg**";
+ }
if (AM.Scale != 0) {
if (!First) OS << " + "; else First = false;
OS << AM.Scale << "*reg(";
@@ -345,8 +380,7 @@ void Formula::dump() const {
/// without changing its value.
static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(),
- SE.getTypeSizeInBits(AR->getType()) + 1);
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
}
@@ -354,8 +388,7 @@ static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
/// without changing its value.
static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(),
- SE.getTypeSizeInBits(A->getType()) + 1);
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
}
@@ -363,8 +396,7 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
/// without changing its value.
static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
const Type *WideTy =
- IntegerType::get(SE.getContext(),
- SE.getTypeSizeInBits(A->getType()) + 1);
+ IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
}
@@ -432,14 +464,14 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
+ const SCEV *S = *I;
if (!Found)
- if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+ if (const SCEV *Q = getExactSDiv(S, RHS, SE,
IgnoreSignificantBits)) {
- Ops.push_back(Q);
+ S = Q;
Found = true;
- continue;
}
- Ops.push_back(*I);
+ Ops.push_back(S);
}
return Found ? SE.getMulExpr(Ops) : 0;
}
@@ -810,8 +842,7 @@ struct LSRFixup {
}
LSRFixup::LSRFixup()
- : UserInst(0), OperandValToReplace(0),
- LUIdx(~size_t(0)), Offset(0) {}
+ : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {}
/// isUseFullyOutsideLoop - Test whether this fixup always uses its
/// value outside of the given loop.
@@ -934,7 +965,10 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true) {}
+ bool HasFormulaWithSameRegs(const Formula &F) const;
bool InsertFormula(const Formula &F);
+ void DeleteFormula(Formula &F);
+ void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
void check() const;
@@ -942,6 +976,16 @@ public:
void dump() const;
};
+/// HasFormula - Test whether this use as a formula which has the same
+/// registers as the given formula.
+bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
+ SmallVector<const SCEV *, 2> Key = F.BaseRegs;
+ if (F.ScaledReg) Key.push_back(F.ScaledReg);
+ // Unstable sort by host order ok, because this is only used for uniquifying.
+ std::sort(Key.begin(), Key.end());
+ return Uniquifier.count(Key);
+}
+
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRUse::InsertFormula(const Formula &F) {
@@ -972,6 +1016,33 @@ bool LSRUse::InsertFormula(const Formula &F) {
return true;
}
+/// DeleteFormula - Remove the given formula from this use's list.
+void LSRUse::DeleteFormula(Formula &F) {
+ if (&F != &Formulae.back())
+ std::swap(F, Formulae.back());
+ Formulae.pop_back();
+ assert(!Formulae.empty() && "LSRUse has no formulae left!");
+}
+
+/// RecomputeRegs - Recompute the Regs field, and update RegUses.
+void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ SmallPtrSet<const SCEV *, 4> OldRegs = Regs;
+ Regs.clear();
+ for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(),
+ E = Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.ScaledReg) Regs.insert(F.ScaledReg);
+ Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
+ }
+
+ // Update the RegTracker.
+ for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(),
+ E = OldRegs.end(); I != E; ++I)
+ if (!Regs.count(*I))
+ RegUses.DropRegister(*I, LUIdx);
+}
+
void LSRUse::print(raw_ostream &OS) const {
OS << "LSR Use: Kind=";
switch (Kind) {
@@ -1091,6 +1162,13 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
AM.HasBaseReg = HasBaseReg;
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
+ // Canonicalize a scale of 1 to a base register if the formula doesn't
+ // already have a base register.
+ if (!AM.HasBaseReg && AM.Scale == 1) {
+ AM.Scale = 0;
+ AM.HasBaseReg = true;
+ }
+
return isLegalUse(AM, Kind, AccessTy, TLI);
}
@@ -1186,7 +1264,7 @@ class LSRInstance {
void OptimizeShadowIV();
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
- bool OptimizeLoopTermCond();
+ void OptimizeLoopTermCond();
void CollectInterestingTypesAndFactors();
void CollectFixupsAndInitialFormulae();
@@ -1200,13 +1278,17 @@ class LSRInstance {
typedef DenseMap<const SCEV *, size_t> UseMapTy;
UseMapTy UseMap;
- bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+ bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy);
std::pair<size_t, int64_t> getUse(const SCEV *&Expr,
LSRUse::KindType Kind,
const Type *AccessTy);
+ void DeleteUse(LSRUse &LU);
+
+ LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
+
public:
void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
@@ -1227,6 +1309,8 @@ public:
void GenerateAllReuseFormulae();
void FilterOutUndesirableDedicatedRegisters();
+
+ size_t EstimateSearchSpaceComplexity() const;
void NarrowSearchSpaceUsingHeuristics();
void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
@@ -1375,6 +1459,7 @@ void LSRInstance::OptimizeShadowIV() {
/* Remove cast operation */
ShadowUse->replaceAllUsesWith(NewPH);
ShadowUse->eraseFromParent();
+ Changed = true;
break;
}
}
@@ -1382,8 +1467,7 @@ void LSRInstance::OptimizeShadowIV() {
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
-bool LSRInstance::FindIVUserForCond(ICmpInst *Cond,
- IVStrideUse *&CondUse) {
+bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
if (UI->getUser() == Cond) {
// NOTE: we could handle setcc instructions with multiple uses here, but
@@ -1555,7 +1639,7 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
-bool
+void
LSRInstance::OptimizeLoopTermCond() {
SmallPtrSet<Instruction *, 4> PostIncs;
@@ -1621,13 +1705,13 @@ LSRInstance::OptimizeLoopTermCond() {
}
if (const SCEVConstant *D =
dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
+ const ConstantInt *C = D->getValue();
// Stride of one or negative one can have reuse with non-addresses.
- if (D->getValue()->isOne() ||
- D->getValue()->isAllOnesValue())
+ if (C->isOne() || C->isAllOnesValue())
goto decline_post_inc;
// Avoid weird situations.
- if (D->getValue()->getValue().getMinSignedBits() >= 64 ||
- D->getValue()->getValue().isMinSignedValue())
+ if (C->getValue().getMinSignedBits() >= 64 ||
+ C->getValue().isMinSignedValue())
goto decline_post_inc;
// Without TLI, assume that any stride might be valid, and so any
// use might be shared.
@@ -1636,7 +1720,7 @@ LSRInstance::OptimizeLoopTermCond() {
// Check for possible scaled-address reuse.
const Type *AccessTy = getAccessType(UI->getUser());
TargetLowering::AddrMode AM;
- AM.Scale = D->getValue()->getSExtValue();
+ AM.Scale = C->getSExtValue();
if (TLI->isLegalAddressingMode(AM, AccessTy))
goto decline_post_inc;
AM.Scale = -AM.Scale;
@@ -1691,12 +1775,13 @@ LSRInstance::OptimizeLoopTermCond() {
else if (BB != IVIncInsertPos->getParent())
IVIncInsertPos = BB->getTerminator();
}
-
- return Changed;
}
+/// reconcileNewOffset - Determine if the given use can accomodate a fixup
+/// at the given offset and other details. If so, update the use and
+/// return true.
bool
-LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
+LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy) {
int64_t NewMinOffset = LU.MinOffset;
int64_t NewMaxOffset = LU.MaxOffset;
@@ -1709,12 +1794,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
return false;
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
- if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
+ if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
Kind, AccessTy, TLI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
- if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
+ if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
Kind, AccessTy, TLI))
return false;
NewMaxOffset = NewOffset;
@@ -1753,7 +1838,7 @@ LSRInstance::getUse(const SCEV *&Expr,
// A use already existed with this base.
size_t LUIdx = P.first->second;
LSRUse &LU = Uses[LUIdx];
- if (reconcileNewOffset(LU, Offset, Kind, AccessTy))
+ if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
// Reuse this use.
return std::make_pair(LUIdx, Offset);
}
@@ -1774,6 +1859,47 @@ LSRInstance::getUse(const SCEV *&Expr,
return std::make_pair(LUIdx, Offset);
}
+/// DeleteUse - Delete the given use from the Uses list.
+void LSRInstance::DeleteUse(LSRUse &LU) {
+ if (&LU != &Uses.back())
+ std::swap(LU, Uses.back());
+ Uses.pop_back();
+}
+
+/// FindUseWithFormula - Look for a use distinct from OrigLU which is has
+/// a formula that has the same registers as the given formula.
+LSRUse *
+LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
+ const LSRUse &OrigLU) {
+ // Search all uses for the formula. This could be more clever. Ignore
+ // ICmpZero uses because they may contain formulae generated by
+ // GenerateICmpZeroScales, in which case adding fixup offsets may
+ // be invalid.
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ if (&LU != &OrigLU &&
+ LU.Kind != LSRUse::ICmpZero &&
+ LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
+ LU.HasFormulaWithSameRegs(OrigF)) {
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.BaseRegs == OrigF.BaseRegs &&
+ F.ScaledReg == OrigF.ScaledReg &&
+ F.AM.BaseGV == OrigF.AM.BaseGV &&
+ F.AM.Scale == OrigF.AM.Scale &&
+ LU.Kind) {
+ if (F.AM.BaseOffs == 0)
+ return &LU;
+ break;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
@@ -1867,6 +1993,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
if (NV == LF.OperandValToReplace) {
CI->setOperand(1, CI->getOperand(0));
CI->setOperand(0, NV);
+ NV = CI->getOperand(1);
+ Changed = true;
}
// x == y --> x - y == 0
@@ -1901,6 +2029,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
DEBUG(print_fixups(dbgs()));
}
+/// InsertInitialFormula - Insert a formula for the given expression into
+/// the given use, separating out loop-variant portions from loop-invariant
+/// and loop-computable portions.
void
LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
@@ -1909,6 +2040,8 @@ LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
assert(Inserted && "Initial formula already exists!"); (void)Inserted;
}
+/// InsertSupplementalFormula - Insert a simple single-register formula for
+/// the given expression into the given use.
void
LSRInstance::InsertSupplementalFormula(const SCEV *S,
LSRUse &LU, size_t LUIdx) {
@@ -2265,8 +2398,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
/// GenerateScales - Generate stride factor reuse formulae by making use of
/// scaled-offset address modes, for example.
-void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
- Formula Base) {
+void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
// Determine the integer type for the base formula.
const Type *IntTy = Base.getType();
if (!IntTy) return;
@@ -2312,8 +2444,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
- std::swap(F.BaseRegs[i], F.BaseRegs.back());
- F.BaseRegs.pop_back();
+ F.DeleteBaseReg(F.BaseRegs[i]);
(void)InsertFormula(LU, LUIdx, F);
}
}
@@ -2321,8 +2452,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
}
/// GenerateTruncates - Generate reuse formulae from different IV types.
-void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx,
- Formula Base) {
+void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
// This requires TargetLowering to tell us which truncates are free.
if (!TLI) return;
@@ -2479,7 +2609,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
// TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
- Formula F = LU.Formulae[L];
+ const Formula &F = LU.Formulae[L];
// Use the immediate in the scaled register.
if (F.ScaledReg == OrigReg) {
int64_t Offs = (uint64_t)F.AM.BaseOffs +
@@ -2527,10 +2657,11 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end();
J != JE; ++J)
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J))
- if (C->getValue()->getValue().isNegative() !=
- (NewF.AM.BaseOffs < 0) &&
- C->getValue()->getValue().abs()
- .ule(abs64(NewF.AM.BaseOffs)))
+ if ((C->getValue()->getValue() + NewF.AM.BaseOffs).abs().slt(
+ abs64(NewF.AM.BaseOffs)) &&
+ (C->getValue()->getValue() +
+ NewF.AM.BaseOffs).countTrailingZeros() >=
+ CountTrailingZeros_64(NewF.AM.BaseOffs))
goto skip_formula;
// Ok, looks good.
@@ -2579,7 +2710,7 @@ LSRInstance::GenerateAllReuseFormulae() {
/// by other uses, pick the best one and delete the others.
void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
#ifndef NDEBUG
- bool Changed = false;
+ bool ChangedFormulae = false;
#endif
// Collect the best formula for each unique set of shared registers. This
@@ -2591,10 +2722,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
LSRUse &LU = Uses[LUIdx];
FormulaSorter Sorter(L, LU, SE, DT);
+ DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n');
- // Clear out the set of used regs; it will be recomputed.
- LU.Regs.clear();
-
+ bool Any = false;
for (size_t FIdx = 0, NumForms = LU.Formulae.size();
FIdx != NumForms; ++FIdx) {
Formula &F = LU.Formulae[FIdx];
@@ -2619,62 +2749,200 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
Formula &Best = LU.Formulae[P.first->second];
if (Sorter.operator()(F, Best))
std::swap(F, Best);
- DEBUG(dbgs() << "Filtering out "; F.print(dbgs());
+ DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs());
dbgs() << "\n"
- " in favor of "; Best.print(dbgs());
+ " in favor of formula "; Best.print(dbgs());
dbgs() << '\n');
#ifndef NDEBUG
- Changed = true;
+ ChangedFormulae = true;
#endif
- std::swap(F, LU.Formulae.back());
- LU.Formulae.pop_back();
+ LU.DeleteFormula(F);
--FIdx;
--NumForms;
+ Any = true;
continue;
}
- if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
- LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
+
+ // Now that we've filtered out some formulae, recompute the Regs set.
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
+
+ // Reset this to prepare for the next use.
BestFormulae.clear();
}
- DEBUG(if (Changed) {
+ DEBUG(if (ChangedFormulae) {
dbgs() << "\n"
"After filtering out undesirable candidates:\n";
print_uses(dbgs());
});
}
+// This is a rough guess that seems to work fairly well.
+static const size_t ComplexityLimit = UINT16_MAX;
+
+/// EstimateSearchSpaceComplexity - Estimate the worst-case number of
+/// solutions the solver might have to consider. It almost never considers
+/// this many solutions because it prune the search space, but the pruning
+/// isn't always sufficient.
+size_t LSRInstance::EstimateSearchSpaceComplexity() const {
+ uint32_t Power = 1;
+ for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
+ E = Uses.end(); I != E; ++I) {
+ size_t FSize = I->Formulae.size();
+ if (FSize >= ComplexityLimit) {
+ Power = ComplexityLimit;
+ break;
+ }
+ Power *= FSize;
+ if (Power >= ComplexityLimit)
+ break;
+ }
+ return Power;
+}
+
/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
/// formulae to choose from, use some rough heuristics to prune down the number
/// of formulae. This keeps the main solver from taking an extraordinary amount
/// of time in some worst-case scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
- // This is a rough guess that seems to work fairly well.
- const size_t Limit = UINT16_MAX;
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
- SmallPtrSet<const SCEV *, 4> Taken;
- for (;;) {
- // Estimate the worst-case number of solutions we might consider. We almost
- // never consider this many solutions because we prune the search space,
- // but the pruning isn't always sufficient.
- uint32_t Power = 1;
- for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- size_t FSize = I->Formulae.size();
- if (FSize >= Limit) {
- Power = Limit;
- break;
+ DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
+ "which use a superset of registers used by other "
+ "formulae.\n");
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ bool Any = false;
+ for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
+ Formula &F = LU.Formulae[i];
+ // Look for a formula with a constant or GV in a register. If the use
+ // also has a formula with that same value in an immediate field,
+ // delete the one that uses a register.
+ for (SmallVectorImpl<const SCEV *>::const_iterator
+ I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
+ Formula NewF = F;
+ NewF.AM.BaseOffs += C->getValue()->getSExtValue();
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
+ if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
+ if (!F.AM.BaseGV) {
+ Formula NewF = F;
+ NewF.AM.BaseGV = GV;
+ NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
+ (I - F.BaseRegs.begin()));
+ if (LU.HasFormulaWithSameRegs(NewF)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LU.DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ break;
+ }
+ }
+ }
+ }
}
- Power *= FSize;
- if (Power >= Limit)
- break;
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
}
- if (Power < Limit)
- break;
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+
+ if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
+ DEBUG(dbgs() << "The search space is too complex.\n");
+
+ DEBUG(dbgs() << "Narrowing the search space by assuming that uses "
+ "separated by a constant offset will use the same "
+ "registers.\n");
+
+ // This is especially useful for unrolled loops.
+
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
+ for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
+ E = LU.Formulae.end(); I != E; ++I) {
+ const Formula &F = *I;
+ if (F.AM.BaseOffs != 0 && F.AM.Scale == 0) {
+ if (LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU)) {
+ if (reconcileNewOffset(*LUThatHas, F.AM.BaseOffs,
+ /*HasBaseReg=*/false,
+ LU.Kind, LU.AccessTy)) {
+ DEBUG(dbgs() << " Deleting use "; LU.print(dbgs());
+ dbgs() << '\n');
+
+ LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+
+ // Delete formulae from the new use which are no longer legal.
+ bool Any = false;
+ for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
+ Formula &F = LUThatHas->Formulae[i];
+ if (!isLegalUse(F.AM,
+ LUThatHas->MinOffset, LUThatHas->MaxOffset,
+ LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
+ DEBUG(dbgs() << " Deleting "; F.print(dbgs());
+ dbgs() << '\n');
+ LUThatHas->DeleteFormula(F);
+ --i;
+ --e;
+ Any = true;
+ }
+ }
+ if (Any)
+ LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
+
+ // Update the relocs to reference the new use.
+ for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ LSRFixup &Fixup = *I;
+ if (Fixup.LUIdx == LUIdx) {
+ Fixup.LUIdx = LUThatHas - &Uses.front();
+ Fixup.Offset += F.AM.BaseOffs;
+ DEBUG(errs() << "New fixup has offset "
+ << Fixup.Offset << '\n');
+ }
+ if (Fixup.LUIdx == NumUses-1)
+ Fixup.LUIdx = LUIdx;
+ }
+
+ // Delete the old use.
+ DeleteUse(LU);
+ --LUIdx;
+ --NumUses;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ DEBUG(dbgs() << "After pre-selection:\n";
+ print_uses(dbgs()));
+ }
+
+ // With all other options exhausted, loop until the system is simple
+ // enough to handle.
+ SmallPtrSet<const SCEV *, 4> Taken;
+ while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
// Ok, we have too many of formulae on our hands to conveniently handle.
// Use a rough heuristic to thin out the list.
+ DEBUG(dbgs() << "The search space is too complex.\n");
// Pick the register which is used by the most LSRUses, which is likely
// to be a good reuse register candidate.
@@ -2702,28 +2970,26 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
// In any use with formulae which references this register, delete formulae
// which don't reference it.
- for (SmallVectorImpl<LSRUse>::iterator I = Uses.begin(),
- E = Uses.end(); I != E; ++I) {
- LSRUse &LU = *I;
+ for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
+ LSRUse &LU = Uses[LUIdx];
if (!LU.Regs.count(Best)) continue;
- // Clear out the set of used regs; it will be recomputed.
- LU.Regs.clear();
-
+ bool Any = false;
for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
Formula &F = LU.Formulae[i];
if (!F.referencesReg(Best)) {
DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n');
- std::swap(LU.Formulae.back(), F);
- LU.Formulae.pop_back();
+ LU.DeleteFormula(F);
--e;
--i;
+ Any = true;
+ assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
continue;
}
-
- if (F.ScaledReg) LU.Regs.insert(F.ScaledReg);
- LU.Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
}
+
+ if (Any)
+ LU.RecomputeRegs(LUIdx, RegUses);
}
DEBUG(dbgs() << "After pre-selection:\n";
@@ -2810,11 +3076,14 @@ retry:
// If none of the formulae had all of the required registers, relax the
// constraint so that we don't exclude all formulae.
if (!AnySatisfiedReqRegs) {
+ assert(!ReqRegs.empty() && "Solver failed even without required registers");
ReqRegs.clear();
goto retry;
}
}
+/// Solve - Choose one formula from each use. Return the results in the given
+/// Solution vector.
void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
SmallVector<const Formula *, 8> Workspace;
Cost SolutionCost;
@@ -2824,6 +3093,7 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
DenseSet<const SCEV *> VisitedRegs;
Workspace.reserve(Uses.size());
+ // SolveRecurse does all the work.
SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
CurRegs, VisitedRegs);
@@ -2839,17 +3109,8 @@ void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
Solution[i]->print(dbgs());
dbgs() << '\n';
});
-}
-/// getImmediateDominator - A handy utility for the specific DominatorTree
-/// query that we need here.
-///
-static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
- DomTreeNode *Node = DT.getNode(BB);
- if (!Node) return 0;
- Node = Node->getIDom();
- if (!Node) return 0;
- return Node->getBlock();
+ assert(Solution.size() == Uses.size() && "Malformed solution!");
}
/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up
@@ -2865,9 +3126,11 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
BasicBlock *IDom;
- for (BasicBlock *Rung = IP->getParent(); ; Rung = IDom) {
- IDom = getImmediateDominator(Rung, DT);
- if (!IDom) return IP;
+ for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
+ if (!Rung) return IP;
+ Rung = Rung->getIDom();
+ if (!Rung) return IP;
+ IDom = Rung->getBlock();
// Don't climb into a loop though.
const Loop *IDomLoop = LI.getLoopFor(IDom);
@@ -2891,7 +3154,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
// instead of at the end, so that it can be used for other expansions.
if (IDom == Inst->getParent() &&
(!BetterPos || DT.dominates(BetterPos, Inst)))
- BetterPos = next(BasicBlock::iterator(Inst));
+ BetterPos = llvm::next(BasicBlock::iterator(Inst));
}
if (!AllDominate)
break;
@@ -2957,6 +3220,8 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator IP,
return IP;
}
+/// Expand - Emit instructions for the leading candidate expression for this
+/// LSRUse (this is called "expanding").
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
@@ -3212,6 +3477,8 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
DeadInsts.push_back(LF.OperandValToReplace);
}
+/// ImplementSolution - Rewrite all the fixup locations with new values,
+/// following the chosen solution.
void
LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P) {
@@ -3224,10 +3491,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
// Expand the new value definitions and update the users.
- for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
- size_t LUIdx = Fixups[i].LUIdx;
+ for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
+ E = Fixups.end(); I != E; ++I) {
+ const LSRFixup &Fixup = *I;
- Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
+ Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
@@ -3256,13 +3524,11 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false);
dbgs() << ":\n");
- /// OptimizeShadowIV - If IV is used in a int-to-float cast
- /// inside the loop then try to eliminate the cast operation.
+ // First, perform some low-level loop optimizations.
OptimizeShadowIV();
+ OptimizeLoopTermCond();
- // Change loop terminating condition to use the postinc iv when possible.
- Changed |= OptimizeLoopTermCond();
-
+ // Start collecting data and preparing for the solver.
CollectInterestingTypesAndFactors();
CollectFixupsAndInitialFormulae();
CollectLoopInvariantFixupsAndFormulae();
@@ -3283,7 +3549,6 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
SmallVector<const Formula *, 8> Solution;
Solve(Solution);
- assert(Solution.size() == Uses.size() && "Malformed solution!");
// Release memory that is no longer needed.
Factors.clear();
@@ -3333,9 +3598,8 @@ void LSRInstance::print_fixups(raw_ostream &OS) const {
OS << "LSR is examining the following fixup sites:\n";
for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(),
E = Fixups.end(); I != E; ++I) {
- const LSRFixup &LF = *I;
dbgs() << " ";
- LF.print(OS);
+ I->print(OS);
OS << '\n';
}
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b621e8d..9744100 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -58,13 +58,20 @@ FunctionPass *llvm::createCFGSimplificationPass() {
/// ChangeToUnreachable - Insert an unreachable instruction before the specified
/// instruction, making it and the rest of the code in the block dead.
-static void ChangeToUnreachable(Instruction *I) {
+static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
BasicBlock *BB = I->getParent();
// Loop over all of the successors, removing BB's entry from any PHI
// nodes.
for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; ++SI)
(*SI)->removePredecessor(BB);
+ // Insert a call to llvm.trap right before this. This turns the undefined
+ // behavior into a hard fail instead of falling through into random code.
+ if (UseLLVMTrap) {
+ Function *TrapFn =
+ Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap);
+ CallInst::Create(TrapFn, "", I);
+ }
new UnreachableInst(I->getContext(), I);
// All instructions after this are dead.
@@ -118,7 +125,8 @@ static bool MarkAliveBlocks(BasicBlock *BB,
// though.
++BBI;
if (!isa<UnreachableInst>(BBI)) {
- ChangeToUnreachable(BBI);
+ // Don't insert a call to llvm.trap right before the unreachable.
+ ChangeToUnreachable(BBI, false);
Changed = true;
}
break;
@@ -134,7 +142,7 @@ static bool MarkAliveBlocks(BasicBlock *BB,
if (isa<UndefValue>(Ptr) ||
(isa<ConstantPointerNull>(Ptr) &&
SI->getPointerAddressSpace() == 0)) {
- ChangeToUnreachable(SI);
+ ChangeToUnreachable(SI, true);
Changed = true;
break;
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index b053cfc..7414be7 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -558,10 +558,13 @@ struct MemCmpOpt : public LibCallOptimization {
if (Len == 0) // memcmp(s1,s2,0) -> 0
return Constant::getNullValue(CI->getType());
- if (Len == 1) { // memcmp(S1,S2,1) -> *LHS - *RHS
- Value *LHSV = B.CreateLoad(CastToCStr(LHS, B), "lhsv");
- Value *RHSV = B.CreateLoad(CastToCStr(RHS, B), "rhsv");
- return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType());
+ // memcmp(S1,S2,1) -> *(unsigned char*)LHS - *(unsigned char*)RHS
+ if (Len == 1) {
+ Value *LHSV = B.CreateZExt(B.CreateLoad(CastToCStr(LHS, B), "lhsc"),
+ CI->getType(), "lhsv");
+ Value *RHSV = B.CreateZExt(B.CreateLoad(CastToCStr(RHS, B), "rhsc"),
+ CI->getType(), "rhsv");
+ return B.CreateSub(LHSV, RHSV, "chardiff");
}
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp
new file mode 100644
index 0000000..b88ba48
--- /dev/null
+++ b/lib/Transforms/Scalar/Sink.cpp
@@ -0,0 +1,267 @@
+//===-- Sink.cpp - Code Sinking -------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass moves instructions into successor blocks, when possible, so that
+// they aren't executed on paths where their results aren't needed.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sink"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Assembly/Writer.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+STATISTIC(NumSunk, "Number of instructions sunk");
+
+namespace {
+ class Sinking : public FunctionPass {
+ DominatorTree *DT;
+ LoopInfo *LI;
+ AliasAnalysis *AA;
+
+ public:
+ static char ID; // Pass identification
+ Sinking() : FunctionPass(&ID) {}
+
+ virtual bool runOnFunction(Function &F);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ FunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<AliasAnalysis>();
+ AU.addRequired<DominatorTree>();
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<DominatorTree>();
+ AU.addPreserved<LoopInfo>();
+ }
+ private:
+ bool ProcessBlock(BasicBlock &BB);
+ bool SinkInstruction(Instruction *I, SmallPtrSet<Instruction *, 8> &Stores);
+ bool AllUsesDominatedByBlock(Instruction *Inst, BasicBlock *BB) const;
+ };
+} // end anonymous namespace
+
+char Sinking::ID = 0;
+static RegisterPass<Sinking>
+X("sink", "Code sinking");
+
+FunctionPass *llvm::createSinkingPass() { return new Sinking(); }
+
+/// AllUsesDominatedByBlock - Return true if all uses of the specified value
+/// occur in blocks dominated by the specified block.
+bool Sinking::AllUsesDominatedByBlock(Instruction *Inst,
+ BasicBlock *BB) const {
+ // Ignoring debug uses is necessary so debug info doesn't affect the code.
+ // This may leave a referencing dbg_value in the original block, before
+ // the definition of the vreg. Dwarf generator handles this although the
+ // user might not get the right info at runtime.
+ for (Value::use_iterator I = Inst->use_begin(),
+ E = Inst->use_end(); I != E; ++I) {
+ // Determine the block of the use.
+ Instruction *UseInst = cast<Instruction>(*I);
+ BasicBlock *UseBlock = UseInst->getParent();
+ if (PHINode *PN = dyn_cast<PHINode>(UseInst)) {
+ // PHI nodes use the operand in the predecessor block, not the block with
+ // the PHI.
+ unsigned Num = PHINode::getIncomingValueNumForOperand(I.getOperandNo());
+ UseBlock = PN->getIncomingBlock(Num);
+ }
+ // Check that it dominates.
+ if (!DT->dominates(BB, UseBlock))
+ return false;
+ }
+ return true;
+}
+
+bool Sinking::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTree>();
+ LI = &getAnalysis<LoopInfo>();
+ AA = &getAnalysis<AliasAnalysis>();
+
+ bool EverMadeChange = false;
+
+ while (1) {
+ bool MadeChange = false;
+
+ // Process all basic blocks.
+ for (Function::iterator I = F.begin(), E = F.end();
+ I != E; ++I)
+ MadeChange |= ProcessBlock(*I);
+
+ // If this iteration over the code changed anything, keep iterating.
+ if (!MadeChange) break;
+ EverMadeChange = true;
+ }
+ return EverMadeChange;
+}
+
+bool Sinking::ProcessBlock(BasicBlock &BB) {
+ // Can't sink anything out of a block that has less than two successors.
+ if (BB.getTerminator()->getNumSuccessors() <= 1 || BB.empty()) return false;
+
+ // Don't bother sinking code out of unreachable blocks. In addition to being
+ // unprofitable, it can also lead to infinite looping, because in an unreachable
+ // loop there may be nowhere to stop.
+ if (!DT->isReachableFromEntry(&BB)) return false;
+
+ bool MadeChange = false;
+
+ // Walk the basic block bottom-up. Remember if we saw a store.
+ BasicBlock::iterator I = BB.end();
+ --I;
+ bool ProcessedBegin = false;
+ SmallPtrSet<Instruction *, 8> Stores;
+ do {
+ Instruction *Inst = I; // The instruction to sink.
+
+ // Predecrement I (if it's not begin) so that it isn't invalidated by
+ // sinking.
+ ProcessedBegin = I == BB.begin();
+ if (!ProcessedBegin)
+ --I;
+
+ if (isa<DbgInfoIntrinsic>(Inst))
+ continue;
+
+ if (SinkInstruction(Inst, Stores))
+ ++NumSunk, MadeChange = true;
+
+ // If we just processed the first instruction in the block, we're done.
+ } while (!ProcessedBegin);
+
+ return MadeChange;
+}
+
+static bool isSafeToMove(Instruction *Inst, AliasAnalysis *AA,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ if (LoadInst *L = dyn_cast<LoadInst>(Inst)) {
+ if (L->isVolatile()) return false;
+
+ Value *Ptr = L->getPointerOperand();
+ unsigned Size = AA->getTypeStoreSize(L->getType());
+ for (SmallPtrSet<Instruction *, 8>::iterator I = Stores.begin(),
+ E = Stores.end(); I != E; ++I)
+ if (AA->getModRefInfo(*I, Ptr, Size) & AliasAnalysis::Mod)
+ return false;
+ }
+
+ if (Inst->mayWriteToMemory()) {
+ Stores.insert(Inst);
+ return false;
+ }
+
+ return Inst->isSafeToSpeculativelyExecute();
+}
+
+/// SinkInstruction - Determine whether it is safe to sink the specified machine
+/// instruction out of its current block into a successor.
+bool Sinking::SinkInstruction(Instruction *Inst,
+ SmallPtrSet<Instruction *, 8> &Stores) {
+ // Check if it's safe to move the instruction.
+ if (!isSafeToMove(Inst, AA, Stores))
+ return false;
+
+ // FIXME: This should include support for sinking instructions within the
+ // block they are currently in to shorten the live ranges. We often get
+ // instructions sunk into the top of a large block, but it would be better to
+ // also sink them down before their first use in the block. This xform has to
+ // be careful not to *increase* register pressure though, e.g. sinking
+ // "x = y + z" down if it kills y and z would increase the live ranges of y
+ // and z and only shrink the live range of x.
+
+ // Loop over all the operands of the specified instruction. If there is
+ // anything we can't handle, bail out.
+ BasicBlock *ParentBlock = Inst->getParent();
+
+ // SuccToSinkTo - This is the successor to sink this instruction to, once we
+ // decide.
+ BasicBlock *SuccToSinkTo = 0;
+
+ // FIXME: This picks a successor to sink into based on having one
+ // successor that dominates all the uses. However, there are cases where
+ // sinking can happen but where the sink point isn't a successor. For
+ // example:
+ // x = computation
+ // if () {} else {}
+ // use x
+ // the instruction could be sunk over the whole diamond for the
+ // if/then/else (or loop, etc), allowing it to be sunk into other blocks
+ // after that.
+
+ // Instructions can only be sunk if all their uses are in blocks
+ // dominated by one of the successors.
+ // Look at all the successors and decide which one
+ // we should sink to.
+ for (succ_iterator SI = succ_begin(ParentBlock),
+ E = succ_end(ParentBlock); SI != E; ++SI) {
+ if (AllUsesDominatedByBlock(Inst, *SI)) {
+ SuccToSinkTo = *SI;
+ break;
+ }
+ }
+
+ // If we couldn't find a block to sink to, ignore this instruction.
+ if (SuccToSinkTo == 0)
+ return false;
+
+ // It is not possible to sink an instruction into its own block. This can
+ // happen with loops.
+ if (Inst->getParent() == SuccToSinkTo)
+ return false;
+
+ DEBUG(dbgs() << "Sink instr " << *Inst);
+ DEBUG(dbgs() << "to block ";
+ WriteAsOperand(dbgs(), SuccToSinkTo, false));
+
+ // If the block has multiple predecessors, this would introduce computation on
+ // a path that it doesn't already exist. We could split the critical edge,
+ // but for now we just punt.
+ // FIXME: Split critical edges if not backedges.
+ if (SuccToSinkTo->getUniquePredecessor() != ParentBlock) {
+ // We cannot sink a load across a critical edge - there may be stores in
+ // other code paths.
+ if (!Inst->isSafeToSpeculativelyExecute()) {
+ DEBUG(dbgs() << " *** PUNTING: Wont sink load along critical edge.\n");
+ return false;
+ }
+
+ // We don't want to sink across a critical edge if we don't dominate the
+ // successor. We could be introducing calculations to new code paths.
+ if (!DT->dominates(ParentBlock, SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Critical edge found\n");
+ return false;
+ }
+
+ // Don't sink instructions into a loop.
+ if (LI->isLoopHeader(SuccToSinkTo)) {
+ DEBUG(dbgs() << " *** PUNTING: Loop header found\n");
+ return false;
+ }
+
+ // Otherwise we are OK with sinking along a critical edge.
+ DEBUG(dbgs() << "Sinking along critical edge.\n");
+ }
+
+ // Determine where to insert into. Skip phi nodes.
+ BasicBlock::iterator InsertPos = SuccToSinkTo->begin();
+ while (InsertPos != SuccToSinkTo->end() && isa<PHINode>(InsertPos))
+ ++InsertPos;
+
+ // Move the instruction.
+ Inst->moveBefore(InsertPos);
+ return true;
+}
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 8ad66dd..6d4fe4b 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -344,7 +344,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) {
DILocation OrigLocation = ILoc.getOrigLocation();
MDNode *NewLoc = TheCallMD;
if (OrigLocation.Verify())
- NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD);
+ NewLoc = UpdateInlinedAtInfo(OrigLocation, TheCallMD);
Value *MDVs[] = {
InsnMD->getOperand(0), // Line
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index f181f3a..13f0a28 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -861,7 +861,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
// Find the nearest store that has a lower than this load.
StoresByIndexTy::iterator I =
std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(),
- std::pair<unsigned, StoreInst*>(LoadIdx, 0),
+ std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)),
StoreIndexSearchPredicate());
// If there is no store before this load, then we can't promote this load.
@@ -886,7 +886,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
StoreInst *SI) {
DIVariable DIVar(DDI->getVariable());
- if (!DIVar.getNode())
+ if (!DIVar.Verify())
return;
if (!DIF)
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 25d50db..f4bdb527 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ssaupdater"
-#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Instructions.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/AlignOf.h"
@@ -20,40 +19,17 @@
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include "llvm/Transforms/Utils/SSAUpdaterImpl.h"
using namespace llvm;
-/// BBInfo - Per-basic block information used internally by SSAUpdater.
-/// The predecessors of each block are cached here since pred_iterator is
-/// slow and we need to iterate over the blocks at least a few times.
-class SSAUpdater::BBInfo {
-public:
- BasicBlock *BB; // Back-pointer to the corresponding block.
- Value *AvailableVal; // Value to use in this block.
- BBInfo *DefBB; // Block that defines the available value.
- int BlkNum; // Postorder number.
- BBInfo *IDom; // Immediate dominator.
- unsigned NumPreds; // Number of predecessor blocks.
- BBInfo **Preds; // Array[NumPreds] of predecessor blocks.
- PHINode *PHITag; // Marker for existing PHIs that match.
-
- BBInfo(BasicBlock *ThisBB, Value *V)
- : BB(ThisBB), AvailableVal(V), DefBB(V ? this : 0), BlkNum(0), IDom(0),
- NumPreds(0), Preds(0), PHITag(0) { }
-};
-
-typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
-
typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
-static BBMapTy *getBBMap(void *BM) {
- return static_cast<BBMapTy*>(BM);
-}
-
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), PrototypeValue(0), BM(0), InsertedPHIs(NewPHI) {}
+ : AV(0), PrototypeValue(0), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete &getAvailableVals(AV);
@@ -105,9 +81,7 @@ static bool IsEquivalentPHI(PHINode *PHI,
/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
/// live at the end of the specified block.
Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
- assert(BM == 0 && "Unexpected Internal State");
Value *Res = GetValueAtEndOfBlockInternal(BB);
- assert(BM == 0 && "Unexpected Internal State");
return Res;
}
@@ -231,427 +205,126 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
-/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
-/// for the specified BB and if so, return it. If not, construct SSA form by
-/// first calculating the required placement of PHIs and then inserting new
-/// PHIs where needed.
-Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- if (Value *V = AvailableVals[BB])
- return V;
-
- // Pool allocation used internally by GetValueAtEndOfBlock.
- BumpPtrAllocator Allocator;
- BBMapTy BBMapObj;
- BM = &BBMapObj;
-
- SmallVector<BBInfo*, 100> BlockList;
- BuildBlockList(BB, &BlockList, &Allocator);
-
- // Special case: bail out if BB is unreachable.
- if (BlockList.size() == 0) {
- BM = 0;
- return UndefValue::get(PrototypeValue->getType());
- }
-
- FindDominators(&BlockList);
- FindPHIPlacement(&BlockList);
- FindAvailableVals(&BlockList);
-
- BM = 0;
- return BBMapObj[BB]->DefBB->AvailableVal;
+/// PHIiter - Iterator for PHI operands. This is used for the PHI_iterator
+/// in the SSAUpdaterImpl template.
+namespace {
+ class PHIiter {
+ private:
+ PHINode *PHI;
+ unsigned idx;
+
+ public:
+ explicit PHIiter(PHINode *P) // begin iterator
+ : PHI(P), idx(0) {}
+ PHIiter(PHINode *P, bool) // end iterator
+ : PHI(P), idx(PHI->getNumIncomingValues()) {}
+
+ PHIiter &operator++() { ++idx; return *this; }
+ bool operator==(const PHIiter& x) const { return idx == x.idx; }
+ bool operator!=(const PHIiter& x) const { return !operator==(x); }
+ Value *getIncomingValue() { return PHI->getIncomingValue(idx); }
+ BasicBlock *getIncomingBlock() { return PHI->getIncomingBlock(idx); }
+ };
}
-/// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
-/// vector, set Info->NumPreds, and allocate space in Info->Preds.
-static void FindPredecessorBlocks(SSAUpdater::BBInfo *Info,
- SmallVectorImpl<BasicBlock*> *Preds,
- BumpPtrAllocator *Allocator) {
- // We can get our predecessor info by walking the pred_iterator list,
- // but it is relatively slow. If we already have PHI nodes in this
- // block, walk one of them to get the predecessor list instead.
- BasicBlock *BB = Info->BB;
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
- Preds->push_back(SomePhi->getIncomingBlock(PI));
- } else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Preds->push_back(*PI);
+/// SSAUpdaterTraits<SSAUpdater> - Traits for the SSAUpdaterImpl template,
+/// specialized for SSAUpdater.
+namespace llvm {
+template<>
+class SSAUpdaterTraits<SSAUpdater> {
+public:
+ typedef BasicBlock BlkT;
+ typedef Value *ValT;
+ typedef PHINode PhiT;
+
+ typedef succ_iterator BlkSucc_iterator;
+ static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return succ_begin(BB); }
+ static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return succ_end(BB); }
+
+ typedef PHIiter PHI_iterator;
+ static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); }
+ static inline PHI_iterator PHI_end(PhiT *PHI) {
+ return PHI_iterator(PHI, true);
}
- Info->NumPreds = Preds->size();
- Info->Preds = static_cast<SSAUpdater::BBInfo**>
- (Allocator->Allocate(Info->NumPreds * sizeof(SSAUpdater::BBInfo*),
- AlignOf<SSAUpdater::BBInfo*>::Alignment));
-}
-
-/// BuildBlockList - Starting from the specified basic block, traverse back
-/// through its predecessors until reaching blocks with known values. Create
-/// BBInfo structures for the blocks and append them to the block list.
-void SSAUpdater::BuildBlockList(BasicBlock *BB, BlockListTy *BlockList,
- BumpPtrAllocator *Allocator) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- BBMapTy *BBMap = getBBMap(BM);
- SmallVector<BBInfo*, 10> RootList;
- SmallVector<BBInfo*, 64> WorkList;
-
- BBInfo *Info = new (*Allocator) BBInfo(BB, 0);
- (*BBMap)[BB] = Info;
- WorkList.push_back(Info);
-
- // Search backward from BB, creating BBInfos along the way and stopping when
- // reaching blocks that define the value. Record those defining blocks on
- // the RootList.
- SmallVector<BasicBlock*, 10> Preds;
- while (!WorkList.empty()) {
- Info = WorkList.pop_back_val();
- Preds.clear();
- FindPredecessorBlocks(Info, &Preds, Allocator);
-
- // Treat an unreachable predecessor as a definition with 'undef'.
- if (Info->NumPreds == 0) {
- Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
- Info->DefBB = Info;
- RootList.push_back(Info);
- continue;
- }
-
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- BasicBlock *Pred = Preds[p];
- // Check if BBMap already has a BBInfo for the predecessor block.
- BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
- if (BBMapBucket.second) {
- Info->Preds[p] = BBMapBucket.second;
- continue;
- }
-
- // Create a new BBInfo for the predecessor.
- Value *PredVal = AvailableVals.lookup(Pred);
- BBInfo *PredInfo = new (*Allocator) BBInfo(Pred, PredVal);
- BBMapBucket.second = PredInfo;
- Info->Preds[p] = PredInfo;
-
- if (PredInfo->AvailableVal) {
- RootList.push_back(PredInfo);
- continue;
- }
- WorkList.push_back(PredInfo);
+ /// FindPredecessorBlocks - Put the predecessors of Info->BB into the Preds
+ /// vector, set Info->NumPreds, and allocate space in Info->Preds.
+ static void FindPredecessorBlocks(BasicBlock *BB,
+ SmallVectorImpl<BasicBlock*> *Preds) {
+ // We can get our predecessor info by walking the pred_iterator list,
+ // but it is relatively slow. If we already have PHI nodes in this
+ // block, walk one of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ for (unsigned PI = 0, E = SomePhi->getNumIncomingValues(); PI != E; ++PI)
+ Preds->push_back(SomePhi->getIncomingBlock(PI));
+ } else {
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Preds->push_back(*PI);
}
}
- // Now that we know what blocks are backwards-reachable from the starting
- // block, do a forward depth-first traversal to assign postorder numbers
- // to those blocks.
- BBInfo *PseudoEntry = new (*Allocator) BBInfo(0, 0);
- unsigned BlkNum = 1;
-
- // Initialize the worklist with the roots from the backward traversal.
- while (!RootList.empty()) {
- Info = RootList.pop_back_val();
- Info->IDom = PseudoEntry;
- Info->BlkNum = -1;
- WorkList.push_back(Info);
+ /// GetUndefVal - Get an undefined value of the same type as the value
+ /// being handled.
+ static Value *GetUndefVal(BasicBlock *BB, SSAUpdater *Updater) {
+ return UndefValue::get(Updater->PrototypeValue->getType());
}
- while (!WorkList.empty()) {
- Info = WorkList.back();
-
- if (Info->BlkNum == -2) {
- // All the successors have been handled; assign the postorder number.
- Info->BlkNum = BlkNum++;
- // If not a root, put it on the BlockList.
- if (!Info->AvailableVal)
- BlockList->push_back(Info);
- WorkList.pop_back();
- continue;
- }
-
- // Leave this entry on the worklist, but set its BlkNum to mark that its
- // successors have been put on the worklist. When it returns to the top
- // the list, after handling its successors, it will be assigned a number.
- Info->BlkNum = -2;
-
- // Add unvisited successors to the work list.
- for (succ_iterator SI = succ_begin(Info->BB), E = succ_end(Info->BB);
- SI != E; ++SI) {
- BBInfo *SuccInfo = (*BBMap)[*SI];
- if (!SuccInfo || SuccInfo->BlkNum)
- continue;
- SuccInfo->BlkNum = -1;
- WorkList.push_back(SuccInfo);
- }
+ /// CreateEmptyPHI - Create a new PHI instruction in the specified block.
+ /// Reserve space for the operands but do not fill them in yet.
+ static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds,
+ SSAUpdater *Updater) {
+ PHINode *PHI = PHINode::Create(Updater->PrototypeValue->getType(),
+ Updater->PrototypeValue->getName(),
+ &BB->front());
+ PHI->reserveOperandSpace(NumPreds);
+ return PHI;
}
- PseudoEntry->BlkNum = BlkNum;
-}
-/// IntersectDominators - This is the dataflow lattice "meet" operation for
-/// finding dominators. Given two basic blocks, it walks up the dominator
-/// tree until it finds a common dominator of both. It uses the postorder
-/// number of the blocks to determine how to do that.
-static SSAUpdater::BBInfo *IntersectDominators(SSAUpdater::BBInfo *Blk1,
- SSAUpdater::BBInfo *Blk2) {
- while (Blk1 != Blk2) {
- while (Blk1->BlkNum < Blk2->BlkNum) {
- Blk1 = Blk1->IDom;
- if (!Blk1)
- return Blk2;
- }
- while (Blk2->BlkNum < Blk1->BlkNum) {
- Blk2 = Blk2->IDom;
- if (!Blk2)
- return Blk1;
- }
+ /// AddPHIOperand - Add the specified value as an operand of the PHI for
+ /// the specified predecessor block.
+ static void AddPHIOperand(PHINode *PHI, Value *Val, BasicBlock *Pred) {
+ PHI->addIncoming(Val, Pred);
}
- return Blk1;
-}
-/// FindDominators - Calculate the dominator tree for the subset of the CFG
-/// corresponding to the basic blocks on the BlockList. This uses the
-/// algorithm from: "A Simple, Fast Dominance Algorithm" by Cooper, Harvey and
-/// Kennedy, published in Software--Practice and Experience, 2001, 4:1-10.
-/// Because the CFG subset does not include any edges leading into blocks that
-/// define the value, the results are not the usual dominator tree. The CFG
-/// subset has a single pseudo-entry node with edges to a set of root nodes
-/// for blocks that define the value. The dominators for this subset CFG are
-/// not the standard dominators but they are adequate for placing PHIs within
-/// the subset CFG.
-void SSAUpdater::FindDominators(BlockListTy *BlockList) {
- bool Changed;
- do {
- Changed = false;
- // Iterate over the list in reverse order, i.e., forward on CFG edges.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- // Start with the first predecessor.
- assert(Info->NumPreds > 0 && "unreachable block");
- BBInfo *NewIDom = Info->Preds[0];
-
- // Iterate through the block's other predecessors.
- for (unsigned p = 1; p != Info->NumPreds; ++p) {
- BBInfo *Pred = Info->Preds[p];
- NewIDom = IntersectDominators(NewIDom, Pred);
- }
-
- // Check if the IDom value has changed.
- if (NewIDom != Info->IDom) {
- Info->IDom = NewIDom;
- Changed = true;
- }
- }
- } while (Changed);
-}
-
-/// IsDefInDomFrontier - Search up the dominator tree from Pred to IDom for
-/// any blocks containing definitions of the value. If one is found, then the
-/// successor of Pred is in the dominance frontier for the definition, and
-/// this function returns true.
-static bool IsDefInDomFrontier(const SSAUpdater::BBInfo *Pred,
- const SSAUpdater::BBInfo *IDom) {
- for (; Pred != IDom; Pred = Pred->IDom) {
- if (Pred->DefBB == Pred)
- return true;
+ /// InstrIsPHI - Check if an instruction is a PHI.
+ ///
+ static PHINode *InstrIsPHI(Instruction *I) {
+ return dyn_cast<PHINode>(I);
}
- return false;
-}
-
-/// FindPHIPlacement - PHIs are needed in the iterated dominance frontiers of
-/// the known definitions. Iteratively add PHIs in the dom frontiers until
-/// nothing changes. Along the way, keep track of the nearest dominating
-/// definitions for non-PHI blocks.
-void SSAUpdater::FindPHIPlacement(BlockListTy *BlockList) {
- bool Changed;
- do {
- Changed = false;
- // Iterate over the list in reverse order, i.e., forward on CFG edges.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- // If this block already needs a PHI, there is nothing to do here.
- if (Info->DefBB == Info)
- continue;
-
- // Default to use the same def as the immediate dominator.
- BBInfo *NewDefBB = Info->IDom->DefBB;
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- if (IsDefInDomFrontier(Info->Preds[p], Info->IDom)) {
- // Need a PHI here.
- NewDefBB = Info;
- break;
- }
- }
-
- // Check if anything changed.
- if (NewDefBB != Info->DefBB) {
- Info->DefBB = NewDefBB;
- Changed = true;
- }
- }
- } while (Changed);
-}
-
-/// FindAvailableVal - If this block requires a PHI, first check if an existing
-/// PHI matches the PHI placement and reaching definitions computed earlier,
-/// and if not, create a new PHI. Visit all the block's predecessors to
-/// calculate the available value for each one and fill in the incoming values
-/// for a new PHI.
-void SSAUpdater::FindAvailableVals(BlockListTy *BlockList) {
- AvailableValsTy &AvailableVals = getAvailableVals(AV);
- // Go through the worklist in forward order (i.e., backward through the CFG)
- // and check if existing PHIs can be used. If not, create empty PHIs where
- // they are needed.
- for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
- I != E; ++I) {
- BBInfo *Info = *I;
- // Check if there needs to be a PHI in BB.
- if (Info->DefBB != Info)
- continue;
-
- // Look for an existing PHI.
- FindExistingPHI(Info->BB, BlockList);
- if (Info->AvailableVal)
- continue;
-
- PHINode *PHI = PHINode::Create(PrototypeValue->getType(),
- PrototypeValue->getName(),
- &Info->BB->front());
- PHI->reserveOperandSpace(Info->NumPreds);
- Info->AvailableVal = PHI;
- AvailableVals[Info->BB] = PHI;
+ /// ValueIsPHI - Check if a value is a PHI.
+ ///
+ static PHINode *ValueIsPHI(Value *Val, SSAUpdater *Updater) {
+ return dyn_cast<PHINode>(Val);
}
- // Now go back through the worklist in reverse order to fill in the arguments
- // for any new PHIs added in the forward traversal.
- for (BlockListTy::reverse_iterator I = BlockList->rbegin(),
- E = BlockList->rend(); I != E; ++I) {
- BBInfo *Info = *I;
-
- if (Info->DefBB != Info) {
- // Record the available value at join nodes to speed up subsequent
- // uses of this SSAUpdater for the same value.
- if (Info->NumPreds > 1)
- AvailableVals[Info->BB] = Info->DefBB->AvailableVal;
- continue;
- }
-
- // Check if this block contains a newly added PHI.
- PHINode *PHI = dyn_cast<PHINode>(Info->AvailableVal);
- if (!PHI || PHI->getNumIncomingValues() == Info->NumPreds)
- continue;
-
- // Iterate through the block's predecessors.
- for (unsigned p = 0; p != Info->NumPreds; ++p) {
- BBInfo *PredInfo = Info->Preds[p];
- BasicBlock *Pred = PredInfo->BB;
- // Skip to the nearest preceding definition.
- if (PredInfo->DefBB != PredInfo)
- PredInfo = PredInfo->DefBB;
- PHI->addIncoming(PredInfo->AvailableVal, Pred);
- }
-
- DEBUG(dbgs() << " Inserted PHI: " << *PHI << "\n");
-
- // If the client wants to know about all new instructions, tell it.
- if (InsertedPHIs) InsertedPHIs->push_back(PHI);
+ /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source
+ /// operands, i.e., it was just added.
+ static PHINode *ValueIsNewPHI(Value *Val, SSAUpdater *Updater) {
+ PHINode *PHI = ValueIsPHI(Val, Updater);
+ if (PHI && PHI->getNumIncomingValues() == 0)
+ return PHI;
+ return 0;
}
-}
-/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
-/// them match what is needed.
-void SSAUpdater::FindExistingPHI(BasicBlock *BB, BlockListTy *BlockList) {
- PHINode *SomePHI;
- for (BasicBlock::iterator It = BB->begin();
- (SomePHI = dyn_cast<PHINode>(It)); ++It) {
- if (CheckIfPHIMatches(SomePHI)) {
- RecordMatchingPHI(SomePHI);
- break;
- }
- // Match failed: clear all the PHITag values.
- for (BlockListTy::iterator I = BlockList->begin(), E = BlockList->end();
- I != E; ++I)
- (*I)->PHITag = 0;
+ /// GetPHIValue - For the specified PHI instruction, return the value
+ /// that it defines.
+ static Value *GetPHIValue(PHINode *PHI) {
+ return PHI;
}
-}
+};
-/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
-/// in the BBMap.
-bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
- BBMapTy *BBMap = getBBMap(BM);
- SmallVector<PHINode*, 20> WorkList;
- WorkList.push_back(PHI);
-
- // Mark that the block containing this PHI has been visited.
- (*BBMap)[PHI->getParent()]->PHITag = PHI;
-
- while (!WorkList.empty()) {
- PHI = WorkList.pop_back_val();
-
- // Iterate through the PHI's incoming values.
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
- Value *IncomingVal = PHI->getIncomingValue(i);
- BBInfo *PredInfo = (*BBMap)[PHI->getIncomingBlock(i)];
- // Skip to the nearest preceding definition.
- if (PredInfo->DefBB != PredInfo)
- PredInfo = PredInfo->DefBB;
-
- // Check if it matches the expected value.
- if (PredInfo->AvailableVal) {
- if (IncomingVal == PredInfo->AvailableVal)
- continue;
- return false;
- }
-
- // Check if the value is a PHI in the correct block.
- PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
- if (!IncomingPHIVal || IncomingPHIVal->getParent() != PredInfo->BB)
- return false;
-
- // If this block has already been visited, check if this PHI matches.
- if (PredInfo->PHITag) {
- if (IncomingPHIVal == PredInfo->PHITag)
- continue;
- return false;
- }
- PredInfo->PHITag = IncomingPHIVal;
-
- WorkList.push_back(IncomingPHIVal);
- }
- }
- return true;
-}
+} // End llvm namespace
-/// RecordMatchingPHI - For a PHI node that matches, record it and its input
-/// PHIs in both the BBMap and the AvailableVals mapping.
-void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
- BBMapTy *BBMap = getBBMap(BM);
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
+Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
- SmallVector<PHINode*, 20> WorkList;
- WorkList.push_back(PHI);
-
- // Record this PHI.
- BasicBlock *BB = PHI->getParent();
- AvailableVals[BB] = PHI;
- (*BBMap)[BB]->AvailableVal = PHI;
-
- while (!WorkList.empty()) {
- PHI = WorkList.pop_back_val();
-
- // Iterate through the PHI's incoming values.
- for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
- PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
- if (!IncomingPHIVal) continue;
- BB = IncomingPHIVal->getParent();
- BBInfo *Info = (*BBMap)[BB];
- if (!Info || Info->AvailableVal)
- continue;
-
- // Record the PHI and add it to the worklist.
- AvailableVals[BB] = IncomingPHIVal;
- Info->AvailableVal = IncomingPHIVal;
- WorkList.push_back(IncomingPHIVal);
- }
- }
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ SSAUpdaterImpl<SSAUpdater> Impl(this, &AvailableVals, InsertedPHIs);
+ return Impl.GetValue(BB);
}
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6c1aa5e..e48c026 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -677,11 +677,16 @@ void SlotTracker::processFunction() {
if (!I->getType()->isVoidTy() && !I->hasName())
CreateFunctionSlot(I);
- // Intrinsics can directly use metadata.
- if (isa<IntrinsicInst>(I))
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
- CreateMetadataSlot(N);
+ // Intrinsics can directly use metadata. We allow direct calls to any
+ // llvm.foo function here, because the target may not be linked into the
+ // optimizer.
+ if (const CallInst *CI = dyn_cast<CallInst>(I)) {
+ if (Function *F = CI->getCalledFunction())
+ if (F->getName().startswith("llvm."))
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (MDNode *N = dyn_cast_or_null<MDNode>(I->getOperand(i)))
+ CreateMetadataSlot(N);
+ }
// Process metadata attached with this instruction.
I->getAllMetadata(MDForInst);
@@ -1568,6 +1573,7 @@ void AssemblyWriter::printFunction(const Function *F) {
case CallingConv::Cold: Out << "coldcc "; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break;
case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+ case CallingConv::X86_ThisCall: Out << "x86_thiscallcc "; break;
case CallingConv::ARM_APCS: Out << "arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
@@ -1840,6 +1846,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::Cold: Out << " coldcc"; break;
case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
@@ -1892,6 +1899,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::Cold: Out << " coldcc"; break;
case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::X86_ThisCall: Out << " x86_thiscallcc"; break;
case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
@@ -2024,9 +2032,9 @@ static void WriteMDNodeComment(const MDNode *Node,
return;
ConstantInt *CI = dyn_cast_or_null<ConstantInt>(Node->getOperand(0));
if (!CI) return;
- unsigned Val = CI->getZExtValue();
- unsigned Tag = Val & ~LLVMDebugVersionMask;
- if (Val < LLVMDebugVersion)
+ APInt Val = CI->getValue();
+ APInt Tag = Val & ~APInt(Val.getBitWidth(), LLVMDebugVersionMask);
+ if (Val.ult(LLVMDebugVersion))
return;
Out.PadToColumn(50);
@@ -2040,8 +2048,10 @@ static void WriteMDNodeComment(const MDNode *Node,
Out << "; [ DW_TAG_vector_type ]";
else if (Tag == dwarf::DW_TAG_user_base)
Out << "; [ DW_TAG_user_base ]";
- else if (const char *TagName = dwarf::TagString(Tag))
- Out << "; [ " << TagName << " ]";
+ else if (Tag.isIntN(32)) {
+ if (const char *TagName = dwarf::TagString(Tag.getZExtValue()))
+ Out << "; [ " << TagName << " ]";
+ }
}
void AssemblyWriter::writeAllMDNodes() {
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index b28fdeb..a56938c 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -275,7 +275,7 @@ public:
addImmutablePass(IP);
recordAvailableAnalysis(IP);
} else {
- P->assignPassManager(activeStack);
+ P->assignPassManager(activeStack, PMT_FunctionPassManager);
}
}
@@ -418,7 +418,7 @@ public:
addImmutablePass(IP);
recordAvailableAnalysis(IP);
} else {
- P->assignPassManager(activeStack);
+ P->assignPassManager(activeStack, PMT_ModulePassManager);
}
}
@@ -1270,20 +1270,30 @@ FunctionPassManager::~FunctionPassManager() {
delete FPM;
}
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void FunctionPassManager::addImpl(Pass *P) {
+ FPM->add(P);
+}
+
/// add - Add a pass to the queue of passes to run. This passes
/// ownership of the Pass to the PassManager. When the
/// PassManager_X is destroyed, the pass will be destroyed as well, so
/// there is no need to delete the pass. (TODO delete passes.)
/// This implies that all passes MUST be allocated with 'new'.
void FunctionPassManager::add(Pass *P) {
- if (ShouldPrintBeforePass(P))
- add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
- + P->getPassName() + " ***"));
- FPM->add(P);
+ // If this is a not a function pass, don't add a printer for it.
+ if (P->getPassKind() == PT_Function)
+ if (ShouldPrintBeforePass(P))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ + P->getPassName() + " ***"));
- if (ShouldPrintAfterPass(P))
- add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
- + P->getPassName() + " ***"));
+ addImpl(P);
+
+ if (P->getPassKind() == PT_Function)
+ if (ShouldPrintAfterPass(P))
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ + P->getPassName() + " ***"));
}
/// run - Execute all of the passes scheduled for execution. Keep
@@ -1588,20 +1598,26 @@ PassManager::~PassManager() {
delete PM;
}
+/// addImpl - Add a pass to the queue of passes to run, without
+/// checking whether to add a printer pass.
+void PassManager::addImpl(Pass *P) {
+ PM->add(P);
+}
+
/// add - Add a pass to the queue of passes to run. This passes ownership of
/// the Pass to the PassManager. When the PassManager is destroyed, the pass
/// will be destroyed as well, so there is no need to delete the pass. This
/// implies that all passes MUST be allocated with 'new'.
void PassManager::add(Pass *P) {
if (ShouldPrintBeforePass(P))
- add(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
- + P->getPassName() + " ***"));
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump Before ")
+ + P->getPassName() + " ***"));
- PM->add(P);
+ addImpl(P);
if (ShouldPrintAfterPass(P))
- add(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
- + P->getPassName() + " ***"));
+ addImpl(P->createPrinterPass(dbgs(), std::string("*** IR Dump After ")
+ + P->getPassName() + " ***"));
}
/// run - Execute all of the passes scheduled for execution. Keep track of
@@ -1764,7 +1780,7 @@ void BasicBlockPass::assignPassManager(PMStack &PMS,
// [3] Assign manager to manage this new manager. This may create
// and push new managers into PMS
- BBP->assignPassManager(PMS);
+ BBP->assignPassManager(PMS, PreferredType);
// [4] Push new manager into PMS
PMS.push(BBP);
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index a092cd1..d2a8ce3 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -61,6 +61,10 @@ bool EVT::isExtended256BitVector() const {
return isExtendedVector() && getSizeInBits() == 256;
}
+bool EVT::isExtended512BitVector() const {
+ return isExtendedVector() && getSizeInBits() == 512;
+}
+
EVT EVT::getExtendedVectorElementType() const {
assert(isExtended() && "Type is not extended!");
return EVT::getEVT(cast<VectorType>(LLVMTy)->getElementType());
@@ -121,6 +125,7 @@ std::string EVT::getEVTString() const {
case MVT::v1i64: return "v1i64";
case MVT::v2i64: return "v2i64";
case MVT::v4i64: return "v4i64";
+ case MVT::v8i64: return "v8i64";
case MVT::v2f32: return "v2f32";
case MVT::v4f32: return "v4f32";
case MVT::v8f32: return "v8f32";
@@ -165,6 +170,7 @@ const Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
+ case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 6ad4272..75988cc 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -688,6 +688,7 @@ void Verifier::visitFunction(Function &F) {
case CallingConv::Fast:
case CallingConv::Cold:
case CallingConv::X86_FastCall:
+ case CallingConv::X86_ThisCall:
Assert1(!F.isVarArg(),
"Varargs functions must have C calling conventions!", &F);
break;
@@ -1152,7 +1153,7 @@ void Verifier::VerifyCallSite(CallSite CS) {
Assert1(CS.arg_size() == FTy->getNumParams(),
"Incorrect number of arguments passed to called function!", I);
- // Verify that all arguments to the call match the function type...
+ // Verify that all arguments to the call match the function type.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
Assert3(CS.getArgument(i)->getType() == FTy->getParamType(i),
"Call parameter type does not match function signature!",
@@ -1179,8 +1180,8 @@ void Verifier::VerifyCallSite(CallSite CS) {
}
// Verify that there's no metadata unless it's a direct call to an intrinsic.
- if (!CS.getCalledFunction() || CS.getCalledFunction()->getName().size() < 5 ||
- CS.getCalledFunction()->getName().substr(0, 5) != "llvm.") {
+ if (!CS.getCalledFunction() ||
+ !CS.getCalledFunction()->getName().startswith("llvm.")) {
for (FunctionType::param_iterator PI = FTy->param_begin(),
PE = FTy->param_end(); PI != PE; ++PI)
Assert1(!PI->get()->isMetadataTy(),
OpenPOWER on IntegriCloud