From 9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74 Mon Sep 17 00:00:00 2001 From: rdivacky Date: Tue, 13 Jul 2010 17:19:57 +0000 Subject: Update LLVM to r108243. --- CMakeLists.txt | 2 +- Makefile | 6 +- Makefile.config.in | 14 +- Makefile.rules | 45 +- autoconf/configure.ac | 7 +- autoconf/m4/link_options.m4 | 23 +- bindings/ada/llvm/llvm.ads | 6 +- cmake/config-ix.cmake | 2 +- cmake/modules/TableGen.cmake | 8 +- configure | 203 +- docs/AliasAnalysis.html | 88 +- docs/CodeGenerator.html | 28 +- docs/DebuggingJITedCode.html | 135 +- docs/ExceptionHandling.html | 6 +- docs/FAQ.html | 4 +- docs/GetElementPtr.html | 8 +- docs/GettingStarted.html | 8 +- docs/HowToReleaseLLVM.html | 14 +- docs/LangRef.html | 557 +- docs/MakefileGuide.html | 4 +- docs/Passes.html | 708 ++- docs/SourceLevelDebugging.html | 35 +- docs/TableGenFundamentals.html | 121 +- docs/WritingAnLLVMBackend.html | 8 +- docs/WritingAnLLVMPass.html | 27 +- docs/tutorial/LangImpl3.html | 20 +- docs/tutorial/LangImpl4.html | 14 +- docs/tutorial/LangImpl5.html | 10 +- docs/tutorial/LangImpl6.html | 16 +- docs/tutorial/LangImpl7.html | 20 +- docs/tutorial/OCamlLangImpl3.html | 14 +- docs/tutorial/OCamlLangImpl4.html | 8 +- docs/tutorial/OCamlLangImpl5.html | 4 +- docs/tutorial/OCamlLangImpl6.html | 4 +- docs/tutorial/OCamlLangImpl7.html | 14 +- examples/ExceptionDemo/ExceptionDemo.cpp | 2 +- examples/Kaleidoscope/Chapter3/toy.cpp | 8 +- examples/Kaleidoscope/Chapter4/toy.cpp | 8 +- examples/Kaleidoscope/Chapter5/toy.cpp | 10 +- examples/Kaleidoscope/Chapter6/toy.cpp | 10 +- examples/Kaleidoscope/Chapter7/toy.cpp | 8 +- examples/OCaml-Kaleidoscope/Chapter4/codegen.ml | 6 +- examples/OCaml-Kaleidoscope/Chapter5/codegen.ml | 6 +- examples/OCaml-Kaleidoscope/Chapter6/codegen.ml | 6 +- examples/OCaml-Kaleidoscope/Chapter7/codegen.ml | 6 +- include/llvm-c/Core.h | 3 +- include/llvm-c/Target.h | 3 +- include/llvm-c/lto.h | 2 +- include/llvm/ADT/DAGDeltaAlgorithm.h | 75 + include/llvm/ADT/DenseMap.h | 1 + include/llvm/ADT/EquivalenceClasses.h | 2 +- include/llvm/ADT/FoldingSet.h | 83 + include/llvm/ADT/ImmutableIntervalMap.h | 12 +- include/llvm/ADT/PostOrderIterator.h | 17 +- include/llvm/ADT/SetVector.h | 8 + include/llvm/ADT/SmallPtrSet.h | 49 +- include/llvm/ADT/SmallVector.h | 160 +- include/llvm/ADT/Statistic.h | 4 + include/llvm/ADT/Triple.h | 7 +- include/llvm/ADT/ValueMap.h | 6 +- include/llvm/ADT/ilist.h | 1 + include/llvm/AbstractTypeUser.h | 3 +- include/llvm/Analysis/AliasAnalysis.h | 21 - include/llvm/Analysis/CFGPrinter.h | 6 +- include/llvm/Analysis/CaptureTracking.h | 6 +- include/llvm/Analysis/CodeMetrics.h | 72 + include/llvm/Analysis/DebugInfo.h | 9 +- include/llvm/Analysis/DominatorInternals.h | 18 +- include/llvm/Analysis/Dominators.h | 22 +- include/llvm/Analysis/InlineCost.h | 45 +- include/llvm/Analysis/IntervalIterator.h | 22 +- include/llvm/Analysis/Loads.h | 51 + include/llvm/Analysis/LoopInfo.h | 59 +- include/llvm/Analysis/MemoryBuiltins.h | 4 +- include/llvm/Analysis/ScalarEvolution.h | 8 - include/llvm/Analysis/ScalarEvolutionExpander.h | 15 +- include/llvm/Analysis/ValueTracking.h | 2 +- include/llvm/Bitcode/ReaderWriter.h | 3 +- include/llvm/CodeGen/AsmPrinter.h | 142 +- include/llvm/CodeGen/CallingConvLower.h | 9 +- include/llvm/CodeGen/FastISel.h | 61 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 154 + include/llvm/CodeGen/GCMetadata.h | 96 +- include/llvm/CodeGen/GCMetadataPrinter.h | 28 +- include/llvm/CodeGen/ISDOpcodes.h | 7 +- include/llvm/CodeGen/LinkAllCodegenComponents.h | 1 - include/llvm/CodeGen/LiveInterval.h | 101 +- include/llvm/CodeGen/LiveIntervalAnalysis.h | 17 +- include/llvm/CodeGen/MachineBasicBlock.h | 21 + include/llvm/CodeGen/MachineFrameInfo.h | 12 +- include/llvm/CodeGen/MachineFunctionPass.h | 9 +- include/llvm/CodeGen/MachineInstr.h | 30 +- include/llvm/CodeGen/MachineJumpTableInfo.h | 2 +- include/llvm/CodeGen/MachineLoopInfo.h | 4 +- include/llvm/CodeGen/MachineOperand.h | 16 +- include/llvm/CodeGen/MachineRegisterInfo.h | 15 +- include/llvm/CodeGen/Passes.h | 15 +- include/llvm/CodeGen/PostRAHazardRecognizer.h | 94 + include/llvm/CodeGen/RegisterCoalescer.h | 90 + include/llvm/CodeGen/RegisterScavenging.h | 11 +- include/llvm/CodeGen/RuntimeLibcalls.h | 34 + include/llvm/CodeGen/SelectionDAG.h | 39 +- include/llvm/CodeGen/SelectionDAGISel.h | 15 +- include/llvm/CodeGen/SelectionDAGNodes.h | 133 +- include/llvm/CodeGen/SlotIndexes.h | 61 +- include/llvm/Config/config.h.in | 3 + include/llvm/ExecutionEngine/ExecutionEngine.h | 4 +- include/llvm/GlobalValue.h | 17 +- include/llvm/InlineAsm.h | 3 +- include/llvm/InstrTypes.h | 10 +- include/llvm/Instructions.h | 77 +- include/llvm/IntrinsicInst.h | 64 +- include/llvm/Intrinsics.td | 6 +- include/llvm/LinkAllPasses.h | 1 + include/llvm/MC/MCAssembler.h | 4 +- include/llvm/MC/MCContext.h | 22 + include/llvm/MC/MCDirectives.h | 3 +- include/llvm/MC/MCObjectStreamer.h | 56 + include/llvm/MC/MCObjectWriter.h | 2 + include/llvm/MC/MCParser/AsmLexer.h | 6 +- include/llvm/MC/MCParser/AsmParser.h | 42 +- include/llvm/MC/MCParser/MCAsmLexer.h | 7 + include/llvm/MC/MCParser/MCAsmParser.h | 32 +- include/llvm/MC/MCParser/MCAsmParserExtension.h | 66 + include/llvm/MC/MCSection.h | 6 +- include/llvm/MC/MCSectionCOFF.h | 52 +- include/llvm/MC/MCStreamer.h | 10 +- include/llvm/MC/SectionKind.h | 64 +- include/llvm/Module.h | 29 +- include/llvm/Pass.h | 27 +- include/llvm/PassAnalysisSupport.h | 13 +- include/llvm/PassManagers.h | 5 +- include/llvm/PassSupport.h | 8 +- include/llvm/Support/CFG.h | 2 +- include/llvm/Support/COFF.h | 183 + include/llvm/Support/CallSite.h | 12 +- include/llvm/Support/Dwarf.h | 87 + include/llvm/Support/ELF.h | 264 +- include/llvm/Support/IRBuilder.h | 113 +- include/llvm/Support/IRReader.h | 6 +- include/llvm/Support/MemoryBuffer.h | 31 +- include/llvm/Support/Timer.h | 6 +- include/llvm/Support/raw_ostream.h | 4 +- include/llvm/SymbolTableListTraits.h | 5 +- include/llvm/System/DataTypes.h.cmake | 52 +- include/llvm/System/Path.h | 8 - include/llvm/Target/Target.td | 28 +- include/llvm/Target/TargetAsmParser.h | 4 +- include/llvm/Target/TargetCallingConv.h | 142 + include/llvm/Target/TargetInstrDesc.h | 6 +- include/llvm/Target/TargetInstrInfo.h | 141 +- include/llvm/Target/TargetInstrItineraries.h | 3 +- include/llvm/Target/TargetLowering.h | 282 +- include/llvm/Target/TargetOpcodes.h | 48 +- include/llvm/Target/TargetRegisterInfo.h | 60 +- include/llvm/Transforms/IPO.h | 5 + include/llvm/Transforms/Utils/BasicBlockUtils.h | 18 - include/llvm/Transforms/Utils/BuildLibCalls.h | 4 + include/llvm/Transforms/Utils/Cloning.h | 30 +- include/llvm/Transforms/Utils/Local.h | 11 - include/llvm/Type.h | 6 +- include/llvm/Use.h | 1 + include/llvm/Value.h | 4 +- lib/Analysis/AliasAnalysis.cpp | 4 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 1 - lib/Analysis/AliasDebugger.cpp | 12 +- lib/Analysis/BasicAliasAnalysis.cpp | 145 +- lib/Analysis/CMakeLists.txt | 1 + lib/Analysis/ConstantFolding.cpp | 18 +- lib/Analysis/DebugInfo.cpp | 80 +- lib/Analysis/DomPrinter.cpp | 4 +- lib/Analysis/IPA/CallGraph.cpp | 8 +- lib/Analysis/IPA/GlobalsModRef.cpp | 38 +- lib/Analysis/InlineCost.cpp | 27 +- lib/Analysis/Lint.cpp | 239 +- lib/Analysis/Loads.cpp | 235 + lib/Analysis/LoopInfo.cpp | 7 +- lib/Analysis/MemoryBuiltins.cpp | 22 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 29 +- lib/Analysis/PostDominators.cpp | 5 +- lib/Analysis/ProfileInfo.cpp | 6 +- lib/Analysis/ScalarEvolution.cpp | 325 +- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 40 +- lib/Analysis/ScalarEvolutionExpander.cpp | 135 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 15 +- lib/Analysis/ValueTracking.cpp | 4 +- lib/Archive/ArchiveWriter.cpp | 12 +- lib/AsmParser/LLLexer.cpp | 1 + lib/AsmParser/LLParser.cpp | 42 +- lib/AsmParser/LLToken.h | 6 +- lib/Bitcode/Reader/BitcodeReader.cpp | 21 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 33 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 13 +- lib/Bitcode/Writer/ValueEnumerator.h | 7 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 83 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 1 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 94 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 4 +- lib/CodeGen/AsmPrinter/DIE.cpp | 2 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 246 +- lib/CodeGen/AsmPrinter/DwarfDebug.h | 18 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 6 +- lib/CodeGen/BranchFolding.cpp | 68 +- lib/CodeGen/BranchFolding.h | 5 +- lib/CodeGen/CMakeLists.txt | 7 +- lib/CodeGen/CalcSpillWeights.cpp | 2 +- lib/CodeGen/CallingConvLower.cpp | 177 + lib/CodeGen/CodePlacementOpt.cpp | 4 + lib/CodeGen/CriticalAntiDepBreaker.cpp | 158 +- lib/CodeGen/CriticalAntiDepBreaker.h | 5 +- lib/CodeGen/DwarfEHPrepare.cpp | 110 +- lib/CodeGen/ELFCodeEmitter.cpp | 2 +- lib/CodeGen/ExactHazardRecognizer.cpp | 180 - lib/CodeGen/ExactHazardRecognizer.h | 86 - lib/CodeGen/GCStrategy.cpp | 6 +- lib/CodeGen/IfConversion.cpp | 400 +- lib/CodeGen/InlineSpiller.cpp | 408 ++ lib/CodeGen/IntrinsicLowering.cpp | 51 +- lib/CodeGen/LLVMTargetMachine.cpp | 15 +- lib/CodeGen/LatencyPriorityQueue.cpp | 2 +- lib/CodeGen/LiveInterval.cpp | 66 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 236 +- lib/CodeGen/LiveStackAnalysis.cpp | 4 +- lib/CodeGen/LiveVariables.cpp | 9 +- lib/CodeGen/LowerSubregs.cpp | 217 +- lib/CodeGen/MachineBasicBlock.cpp | 129 +- lib/CodeGen/MachineCSE.cpp | 46 +- lib/CodeGen/MachineDominators.cpp | 1 - lib/CodeGen/MachineFunction.cpp | 16 +- lib/CodeGen/MachineInstr.cpp | 113 +- lib/CodeGen/MachineLICM.cpp | 118 +- lib/CodeGen/MachineRegisterInfo.cpp | 130 +- lib/CodeGen/MachineSink.cpp | 102 +- lib/CodeGen/MachineVerifier.cpp | 3 +- lib/CodeGen/OptimizeExts.cpp | 24 +- lib/CodeGen/OptimizePHIs.cpp | 5 + lib/CodeGen/PBQP/HeuristicSolver.h | 2 +- lib/CodeGen/PBQP/Heuristics/Briggs.h | 5 +- lib/CodeGen/PHIElimination.cpp | 63 +- lib/CodeGen/Passes.cpp | 26 +- lib/CodeGen/PostRAHazardRecognizer.cpp | 180 + lib/CodeGen/PostRASchedulerList.cpp | 51 +- lib/CodeGen/PreAllocSplitting.cpp | 89 +- lib/CodeGen/ProcessImplicitDefs.cpp | 33 +- lib/CodeGen/PrologEpilogInserter.cpp | 65 +- lib/CodeGen/RegAllocFast.cpp | 226 +- lib/CodeGen/RegAllocLinearScan.cpp | 33 +- lib/CodeGen/RegAllocLocal.cpp | 1254 ---- lib/CodeGen/RegAllocPBQP.cpp | 25 +- lib/CodeGen/RegisterCoalescer.cpp | 156 + lib/CodeGen/RegisterScavenging.cpp | 33 +- lib/CodeGen/ScheduleDAG.cpp | 37 +- lib/CodeGen/ScheduleDAGEmit.cpp | 14 +- lib/CodeGen/ScheduleDAGInstrs.h | 5 +- lib/CodeGen/SelectionDAG/CMakeLists.txt | 1 - lib/CodeGen/SelectionDAG/CallingConvLower.cpp | 179 - lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 267 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 354 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 63 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h | 144 - lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 133 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 335 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 40 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 150 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 72 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 + lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 8 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 140 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 17 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 241 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 5 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 158 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1248 ++-- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 12 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 421 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 218 +- lib/CodeGen/ShadowStackGC.cpp | 14 +- lib/CodeGen/SimpleHazardRecognizer.h | 89 - lib/CodeGen/SimpleRegisterCoalescing.cpp | 1790 ++---- lib/CodeGen/SimpleRegisterCoalescing.h | 84 +- lib/CodeGen/SjLjEHPrepare.cpp | 114 +- lib/CodeGen/SlotIndexes.cpp | 4 +- lib/CodeGen/Spiller.cpp | 209 +- lib/CodeGen/Spiller.h | 18 +- lib/CodeGen/StackProtector.cpp | 16 +- lib/CodeGen/StackSlotColoring.cpp | 14 +- lib/CodeGen/StrongPHIElimination.cpp | 22 +- lib/CodeGen/TailDuplication.cpp | 18 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 182 +- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 112 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 303 +- lib/CodeGen/VirtRegRewriter.cpp | 110 +- lib/CompilerDriver/Tool.cpp | 3 +- lib/ExecutionEngine/Interpreter/Execution.cpp | 2 +- .../Interpreter/ExternalFunctions.cpp | 2 +- lib/ExecutionEngine/JIT/JIT.cpp | 54 +- lib/ExecutionEngine/JIT/JIT.h | 34 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 10 +- lib/Linker/LinkItems.cpp | 29 +- lib/MC/CMakeLists.txt | 3 + lib/MC/MCAsmStreamer.cpp | 24 +- lib/MC/MCAssembler.cpp | 18 +- lib/MC/MCContext.cpp | 11 +- lib/MC/MCExpr.cpp | 6 +- lib/MC/MCMachOStreamer.cpp | 161 +- lib/MC/MCObjectStreamer.cpp | 39 + lib/MC/MCParser/AsmLexer.cpp | 7 +- lib/MC/MCParser/AsmParser.cpp | 786 +-- lib/MC/MCParser/CMakeLists.txt | 3 + lib/MC/MCParser/DarwinAsmParser.cpp | 758 +++ lib/MC/MCParser/ELFAsmParser.cpp | 68 + lib/MC/MCParser/MCAsmLexer.cpp | 6 +- lib/MC/MCParser/MCAsmParser.cpp | 6 + lib/MC/MCParser/MCAsmParserExtension.cpp | 21 + lib/MC/MCSectionCOFF.cpp | 14 +- lib/MC/MachObjectWriter.cpp | 61 +- lib/MC/WinCOFFObjectWriter.cpp | 71 + lib/MC/WinCOFFStreamer.cpp | 198 + lib/Support/CMakeLists.txt | 1 + lib/Support/DAGDeltaAlgorithm.cpp | 357 ++ lib/Support/DeltaAlgorithm.cpp | 4 +- lib/Support/Dwarf.cpp | 511 +- lib/Support/FileUtilities.cpp | 26 +- lib/Support/FoldingSet.cpp | 20 +- lib/Support/MemoryBuffer.cpp | 180 +- lib/Support/PrettyStackTrace.cpp | 23 +- lib/Support/SmallPtrSet.cpp | 9 +- lib/Support/SmallVector.cpp | 21 +- lib/Support/Timer.cpp | 10 +- lib/Support/Triple.cpp | 5 +- lib/Support/raw_ostream.cpp | 8 +- lib/System/Disassembler.cpp | 16 +- lib/System/Path.cpp | 29 +- lib/System/Unix/Path.inc | 23 +- lib/System/Unix/Program.inc | 2 +- lib/System/Unix/Signals.inc | 17 +- lib/System/Win32/Path.inc | 6 - lib/System/Win32/Signals.inc | 2 +- lib/Target/ARM/ARM.h | 4 - lib/Target/ARM/ARMAddressingModes.h | 64 + lib/Target/ARM/ARMBaseInstrInfo.cpp | 629 +- lib/Target/ARM/ARMBaseInstrInfo.h | 87 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 191 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 24 +- lib/Target/ARM/ARMCodeEmitter.cpp | 174 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 58 +- lib/Target/ARM/ARMConstantPoolValue.h | 1 + lib/Target/ARM/ARMExpandPseudoInsts.cpp | 10 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 551 +- lib/Target/ARM/ARMISelLowering.cpp | 893 ++- lib/Target/ARM/ARMISelLowering.h | 47 +- lib/Target/ARM/ARMInstrFormats.td | 44 +- lib/Target/ARM/ARMInstrInfo.cpp | 2 +- lib/Target/ARM/ARMInstrInfo.h | 2 +- lib/Target/ARM/ARMInstrInfo.td | 128 +- lib/Target/ARM/ARMInstrNEON.td | 99 +- lib/Target/ARM/ARMInstrThumb.td | 23 +- lib/Target/ARM/ARMInstrThumb2.td | 63 +- lib/Target/ARM/ARMInstrVFP.td | 8 +- lib/Target/ARM/ARMJITInfo.h | 3 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 194 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 12 +- lib/Target/ARM/ARMRegisterInfo.td | 87 +- lib/Target/ARM/ARMScheduleA8.td | 84 +- lib/Target/ARM/ARMScheduleA9.td | 364 +- lib/Target/ARM/ARMScheduleV6.td | 2 +- lib/Target/ARM/ARMTargetMachine.cpp | 30 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 23 +- lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp | 59 +- lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp | 24 +- lib/Target/ARM/AsmPrinter/ARMInstPrinter.h | 5 +- lib/Target/ARM/CMakeLists.txt | 1 + .../ARM/Disassembler/ARMDisassemblerCore.cpp | 113 +- lib/Target/ARM/Disassembler/ARMDisassemblerCore.h | 8 +- .../ARM/Disassembler/ThumbDisassemblerCore.h | 42 +- lib/Target/ARM/NEONMoveFix.cpp | 4 +- lib/Target/ARM/NEONPreAllocPass.cpp | 51 +- lib/Target/ARM/Thumb1InstrInfo.cpp | 125 +- lib/Target/ARM/Thumb1InstrInfo.h | 24 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 17 +- lib/Target/ARM/Thumb1RegisterInfo.h | 6 +- lib/Target/ARM/Thumb2HazardRecognizer.cpp | 53 + lib/Target/ARM/Thumb2HazardRecognizer.h | 40 + lib/Target/ARM/Thumb2ITBlockPass.cpp | 160 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 192 +- lib/Target/ARM/Thumb2InstrInfo.h | 39 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 16 +- lib/Target/Alpha/AlphaISelLowering.cpp | 39 +- lib/Target/Alpha/AlphaISelLowering.h | 2 + lib/Target/Alpha/AlphaInstrFormats.td | 4 +- lib/Target/Alpha/AlphaInstrInfo.cpp | 109 +- lib/Target/Alpha/AlphaInstrInfo.h | 27 +- lib/Target/Alpha/AlphaInstrInfo.td | 34 +- lib/Target/Alpha/AlphaRegisterInfo.cpp | 14 - lib/Target/Alpha/AlphaRegisterInfo.h | 3 - lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp | 4 +- lib/Target/Blackfin/BlackfinISelLowering.cpp | 13 +- lib/Target/Blackfin/BlackfinISelLowering.h | 2 + lib/Target/Blackfin/BlackfinInstrInfo.cpp | 108 +- lib/Target/Blackfin/BlackfinInstrInfo.h | 15 +- lib/Target/Blackfin/BlackfinInstrInfo.td | 12 +- lib/Target/Blackfin/BlackfinRegisterInfo.cpp | 30 - lib/Target/Blackfin/BlackfinRegisterInfo.h | 6 - lib/Target/CBackend/CBackend.cpp | 50 +- lib/Target/CellSPU/SPUCallingConv.td | 82 +- lib/Target/CellSPU/SPUFrameInfo.h | 4 - lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 15 +- lib/Target/CellSPU/SPUISelLowering.cpp | 126 +- lib/Target/CellSPU/SPUISelLowering.h | 3 +- lib/Target/CellSPU/SPUInstrInfo.cpp | 131 +- lib/Target/CellSPU/SPUInstrInfo.h | 33 +- lib/Target/CellSPU/SPUNodes.td | 2 +- lib/Target/CellSPU/SPURegisterInfo.cpp | 57 - lib/Target/CellSPU/SPURegisterInfo.h | 13 - lib/Target/CppBackend/CPPBackend.cpp | 3341 +++++----- lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp | 5 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 88 +- lib/Target/MBlaze/MBlazeISelLowering.h | 2 + lib/Target/MBlaze/MBlazeInstrInfo.cpp | 65 +- lib/Target/MBlaze/MBlazeInstrInfo.h | 25 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 16 - lib/Target/MBlaze/MBlazeRegisterInfo.h | 3 - lib/Target/MSIL/MSILWriter.cpp | 23 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 3 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 43 +- lib/Target/MSP430/MSP430ISelLowering.h | 3 + lib/Target/MSP430/MSP430InstrInfo.cpp | 39 +- lib/Target/MSP430/MSP430InstrInfo.h | 12 +- lib/Target/MSP430/MSP430InstrInfo.td | 455 +- lib/Target/MSP430/MSP430RegisterInfo.cpp | 46 +- lib/Target/MSP430/MSP430RegisterInfo.h | 3 - lib/Target/Mangler.cpp | 2 +- lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp | 5 +- lib/Target/Mips/MipsISelLowering.cpp | 55 +- lib/Target/Mips/MipsISelLowering.h | 2 + lib/Target/Mips/MipsInstrInfo.cpp | 213 +- lib/Target/Mips/MipsInstrInfo.h | 25 +- lib/Target/Mips/MipsInstrInfo.td | 2 +- lib/Target/Mips/MipsRegisterInfo.cpp | 35 +- lib/Target/Mips/MipsRegisterInfo.h | 3 - lib/Target/PIC16/PIC16ISelLowering.cpp | 46 +- lib/Target/PIC16/PIC16ISelLowering.h | 4 + lib/Target/PIC16/PIC16InstrInfo.cpp | 37 +- lib/Target/PIC16/PIC16InstrInfo.h | 13 +- lib/Target/PIC16/PIC16InstrInfo.td | 18 +- lib/Target/PIC16/PIC16MemSelOpt.cpp | 2 +- lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp | 24 +- lib/Target/PIC16/PIC16Passes/PIC16Cloner.h | 4 +- lib/Target/PIC16/PIC16RegisterInfo.cpp | 7 - lib/Target/PIC16/PIC16RegisterInfo.h | 4 - lib/Target/PowerPC/PPCHazardRecognizers.cpp | 2 +- lib/Target/PowerPC/PPCISelLowering.cpp | 124 +- lib/Target/PowerPC/PPCISelLowering.h | 9 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 184 +- lib/Target/PowerPC/PPCInstrInfo.h | 30 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 166 +- lib/Target/PowerPC/PPCRegisterInfo.h | 3 - lib/Target/README.txt | 134 +- lib/Target/Sparc/SparcISelLowering.cpp | 61 +- lib/Target/Sparc/SparcISelLowering.h | 2 + lib/Target/Sparc/SparcInstrInfo.cpp | 98 +- lib/Target/Sparc/SparcInstrInfo.h | 25 +- lib/Target/Sparc/SparcInstrInfo.td | 2 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 7 - lib/Target/Sparc/SparcRegisterInfo.h | 3 - .../SystemZ/AsmPrinter/SystemZAsmPrinter.cpp | 2 +- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 4 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 29 +- lib/Target/SystemZ/SystemZISelLowering.h | 3 + lib/Target/SystemZ/SystemZInstrFP.td | 4 +- lib/Target/SystemZ/SystemZInstrInfo.cpp | 96 +- lib/Target/SystemZ/SystemZInstrInfo.h | 12 +- lib/Target/SystemZ/SystemZInstrInfo.td | 17 +- lib/Target/SystemZ/SystemZRegisterInfo.cpp | 16 - lib/Target/SystemZ/SystemZRegisterInfo.h | 3 - lib/Target/SystemZ/SystemZRegisterInfo.td | 10 +- lib/Target/TargetInstrInfo.cpp | 4 + lib/Target/TargetLoweringObjectFile.cpp | 2 +- lib/Target/TargetRegisterInfo.cpp | 12 +- lib/Target/X86/AsmParser/X86AsmLexer.cpp | 85 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 105 +- lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp | 21 +- lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h | 11 +- lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 7 + lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp | 23 +- lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h | 15 +- lib/Target/X86/AsmPrinter/X86MCInstLower.cpp | 74 +- lib/Target/X86/Disassembler/CMakeLists.txt | 4 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 15 +- lib/Target/X86/README-SSE.txt | 184 +- lib/Target/X86/README-X86-64.txt | 249 +- lib/Target/X86/README.txt | 103 +- lib/Target/X86/X86.h | 4 + lib/Target/X86/X86AsmBackend.cpp | 8 +- lib/Target/X86/X86CallingConv.td | 12 +- lib/Target/X86/X86CodeEmitter.cpp | 20 +- lib/Target/X86/X86FastISel.cpp | 441 +- lib/Target/X86/X86FixupKinds.h | 1 + lib/Target/X86/X86FloatingPoint.cpp | 111 +- lib/Target/X86/X86FloatingPointRegKill.cpp | 17 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 196 +- lib/Target/X86/X86ISelLowering.cpp | 902 +-- lib/Target/X86/X86ISelLowering.h | 43 +- lib/Target/X86/X86Instr64bit.td | 238 +- lib/Target/X86/X86InstrBuilder.h | 39 +- lib/Target/X86/X86InstrFPStack.td | 16 +- lib/Target/X86/X86InstrFormats.td | 90 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 336 + lib/Target/X86/X86InstrInfo.cpp | 642 +- lib/Target/X86/X86InstrInfo.h | 164 +- lib/Target/X86/X86InstrInfo.td | 369 +- lib/Target/X86/X86InstrMMX.td | 14 - lib/Target/X86/X86InstrSSE.td | 6505 +++++++++++--------- lib/Target/X86/X86MCCodeEmitter.cpp | 543 +- lib/Target/X86/X86RegisterInfo.cpp | 117 +- lib/Target/X86/X86RegisterInfo.h | 6 - lib/Target/X86/X86RegisterInfo.td | 32 +- lib/Target/X86/X86Subtarget.cpp | 69 +- lib/Target/X86/X86Subtarget.h | 47 +- lib/Target/X86/X86TargetMachine.cpp | 8 +- lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp | 1 - lib/Target/XCore/XCoreISelLowering.cpp | 61 +- lib/Target/XCore/XCoreISelLowering.h | 8 +- lib/Target/XCore/XCoreInstrInfo.cpp | 70 +- lib/Target/XCore/XCoreInstrInfo.h | 15 +- lib/Target/XCore/XCoreInstrInfo.td | 2 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 14 +- lib/Target/XCore/XCoreRegisterInfo.h | 3 - lib/Transforms/Hello/Hello.cpp | 4 +- lib/Transforms/Hello/Hello.exports | 0 lib/Transforms/Hello/Makefile | 8 + lib/Transforms/IPO/ArgumentPromotion.cpp | 15 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 7 + lib/Transforms/IPO/GlobalOpt.cpp | 51 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 7 +- lib/Transforms/IPO/Inliner.cpp | 2 +- lib/Transforms/IPO/LowerSetJmp.cpp | 18 +- lib/Transforms/IPO/MergeFunctions.cpp | 3 +- lib/Transforms/IPO/PartialInlining.cpp | 10 +- lib/Transforms/IPO/PartialSpecialization.cpp | 48 +- lib/Transforms/IPO/StripSymbols.cpp | 133 +- lib/Transforms/IPO/StructRetPromotion.cpp | 10 +- lib/Transforms/InstCombine/InstCombine.h | 3 +- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 26 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 156 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 15 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 37 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 35 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 5 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 31 + lib/Transforms/InstCombine/InstCombineShifts.cpp | 2 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 16 +- .../InstCombine/InstructionCombining.cpp | 76 +- .../Instrumentation/OptimalEdgeProfiling.cpp | 8 +- lib/Transforms/Instrumentation/ProfilingUtils.cpp | 12 +- lib/Transforms/Scalar/ABCD.cpp | 6 +- lib/Transforms/Scalar/ADCE.cpp | 2 +- lib/Transforms/Scalar/CodeGenPrepare.cpp | 8 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 59 +- lib/Transforms/Scalar/GVN.cpp | 61 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 16 +- lib/Transforms/Scalar/JumpThreading.cpp | 73 +- lib/Transforms/Scalar/LoopDeletion.cpp | 6 +- lib/Transforms/Scalar/LoopIndexSplit.cpp | 28 +- lib/Transforms/Scalar/LoopRotation.cpp | 4 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 143 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 28 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 4 +- lib/Transforms/Scalar/Reassociate.cpp | 5 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 53 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 3 + lib/Transforms/Scalar/SimplifyLibCalls.cpp | 213 +- lib/Transforms/Scalar/TailDuplication.cpp | 5 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 96 +- lib/Transforms/Utils/AddrModeMatcher.cpp | 21 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 118 - lib/Transforms/Utils/BreakCriticalEdges.cpp | 23 +- lib/Transforms/Utils/BuildLibCalls.cpp | 67 +- lib/Transforms/Utils/CloneFunction.cpp | 100 +- lib/Transforms/Utils/CloneLoop.cpp | 33 +- lib/Transforms/Utils/CloneModule.cpp | 56 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 22 +- lib/Transforms/Utils/InlineFunction.cpp | 23 +- lib/Transforms/Utils/LCSSA.cpp | 7 +- lib/Transforms/Utils/Local.cpp | 113 +- lib/Transforms/Utils/LoopSimplify.cpp | 49 +- lib/Transforms/Utils/LoopUnroll.cpp | 20 +- lib/Transforms/Utils/LowerInvoke.cpp | 140 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 16 +- lib/Transforms/Utils/SimplifyCFG.cpp | 15 +- lib/Transforms/Utils/ValueMapper.cpp | 10 +- lib/Transforms/Utils/ValueMapper.h | 4 +- lib/VMCore/AsmWriter.cpp | 33 +- lib/VMCore/AutoUpgrade.cpp | 48 +- lib/VMCore/ConstantFold.cpp | 13 +- lib/VMCore/Core.cpp | 18 +- lib/VMCore/Instruction.cpp | 20 +- lib/VMCore/Instructions.cpp | 65 +- lib/VMCore/IntrinsicInst.cpp | 6 +- lib/VMCore/Metadata.cpp | 1 + lib/VMCore/Module.cpp | 9 +- lib/VMCore/Pass.cpp | 45 + lib/VMCore/PassManager.cpp | 5 + lib/VMCore/Value.cpp | 10 +- lib/VMCore/Verifier.cpp | 44 +- test/Analysis/BasicAA/args-rets-allocas-loads.ll | 310 + test/Analysis/BasicAA/unreachable-block.ll | 16 + .../ScalarEvolution/2008-07-29-SMinExpr.ll | 2 +- test/Analysis/ScalarEvolution/scev-aa.ll | 28 +- test/Analysis/ScalarEvolution/trip-count10.ll | 50 + test/BugPoint/remove_arguments_test.ll | 10 +- test/CMakeLists.txt | 1 + test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll | 1 - test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll | 1 - test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll | 1 - test/CodeGen/ARM/2009-06-22-CoalescerBug.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll | 14 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll | 14 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll | 2 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll | 6 +- test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll | 2 +- test/CodeGen/ARM/2009-07-01-CommuteBug.ll | 2 +- test/CodeGen/ARM/2009-07-18-RewriterBug.ll | 28 +- test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll | 6 +- test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll | 2 +- test/CodeGen/ARM/2009-07-29-VFP3Registers.ll | 2 +- .../ARM/2009-08-02-RegScavengerAssert-Neon.ll | 2 +- .../CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll | 8 +- test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll | 8 +- .../ARM/2009-08-15-RegScavenger-EarlyClobber.ll | 4 +- test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll | 8 +- test/CodeGen/ARM/2009-08-21-PostRAKill.ll | 2 +- test/CodeGen/ARM/2009-08-21-PostRAKill2.ll | 4 +- test/CodeGen/ARM/2009-08-21-PostRAKill3.ll | 2 +- test/CodeGen/ARM/2009-08-21-PostRAKill4.ll | 14 +- test/CodeGen/ARM/2009-08-23-linkerprivate.ll | 2 +- test/CodeGen/ARM/2009-08-26-ScalarToVector.ll | 2 +- test/CodeGen/ARM/2009-08-27-ScalarToVector.ll | 2 +- test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll | 2 +- test/CodeGen/ARM/2009-08-29-TooLongSplat.ll | 2 +- test/CodeGen/ARM/2009-08-31-LSDA-Name.ll | 30 +- test/CodeGen/ARM/2009-09-01-PostRAProlog.ll | 8 +- test/CodeGen/ARM/2009-09-09-AllOnes.ll | 2 +- test/CodeGen/ARM/2009-09-24-spill-align.ll | 2 +- test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll | 2 +- test/CodeGen/ARM/2009-10-27-double-align.ll | 6 +- test/CodeGen/ARM/2009-11-01-NeonMoves.ll | 4 +- test/CodeGen/ARM/2009-12-02-vtrn-undef.ll | 2 +- test/CodeGen/ARM/2010-04-09-NeonSelect.ll | 2 +- test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll | 2 +- test/CodeGen/ARM/2010-04-14-SplitVector.ll | 2 +- test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll | 2 +- test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll | 13 +- test/CodeGen/ARM/2010-05-18-PostIndexBug.ll | 2 +- test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll | 2 +- test/CodeGen/ARM/2010-05-21-BuildVector.ll | 2 +- test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll | 19 + test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll | 148 + test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll | 145 + .../ARM/2010-06-25-Thumb2ITInvalidIterator.ll | 75 + test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll | 10 + .../ARM/2010-06-29-PartialRedefFastAlloc.ll | 25 + test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll | 15 + test/CodeGen/ARM/alloca.ll | 4 +- test/CodeGen/ARM/arm-frameaddr.ll | 2 +- test/CodeGen/ARM/arm-returnaddr.ll | 8 +- test/CodeGen/ARM/armv4.ll | 2 +- test/CodeGen/ARM/call-tc.ll | 48 + test/CodeGen/ARM/call.ll | 6 +- test/CodeGen/ARM/crash-O0.ll | 28 + test/CodeGen/ARM/flag-crash.ll | 27 + test/CodeGen/ARM/fpcmp-opt.ll | 29 + test/CodeGen/ARM/fpconsts.ll | 8 +- test/CodeGen/ARM/ifcvt2.ll | 15 +- test/CodeGen/ARM/ifcvt6.ll | 2 +- test/CodeGen/ARM/indirectbr.ll | 2 +- test/CodeGen/ARM/inlineasm.ll | 8 - test/CodeGen/ARM/inlineasm3.ll | 4 +- test/CodeGen/ARM/insn-sched1.ll | 2 +- test/CodeGen/ARM/ldm.ll | 2 +- test/CodeGen/ARM/long_shift.ll | 8 +- test/CodeGen/ARM/lsr-code-insertion.ll | 2 +- test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 28 +- test/CodeGen/ARM/machine-cse-cmp.ll | 18 + test/CodeGen/ARM/reg_sequence.ll | 22 +- test/CodeGen/ARM/remat.ll | 6 +- test/CodeGen/ARM/select-imm.ll | 6 +- test/CodeGen/ARM/spill-q.ll | 2 +- test/CodeGen/ARM/trap.ll | 2 +- test/CodeGen/ARM/unaligned_load_store.ll | 2 +- test/CodeGen/ARM/va_arg.ll | 41 + test/CodeGen/ARM/vdup.ll | 8 +- test/CodeGen/ARM/vext.ll | 12 +- test/CodeGen/ARM/vget_lane.ll | 4 +- test/CodeGen/ARM/vmov.ll | 58 +- test/CodeGen/ARM/vrev.ll | 28 +- test/CodeGen/Blackfin/cmp64.ll | 4 +- test/CodeGen/CellSPU/call.ll | 5 +- test/CodeGen/CellSPU/call_indirect.ll | 4 +- test/CodeGen/CellSPU/jumptable.ll | 6 +- test/CodeGen/CellSPU/loads.ll | 20 + test/CodeGen/CellSPU/shuffles.ll | 18 + test/CodeGen/CellSPU/vecinsert.ll | 15 +- test/CodeGen/Generic/2006-09-02-LocalAllocCrash.ll | 1 - test/CodeGen/Generic/2010-ZeroSizedArg.ll | 4 +- test/CodeGen/Generic/add-with-overflow-128.ll | 42 + test/CodeGen/Generic/stack-protector.ll | 25 - .../PowerPC/2007-04-30-InlineAsmEarlyClobber.ll | 1 - .../PowerPC/2007-10-21-LocalRegAllocAssert.ll | 1 - .../PowerPC/2007-10-21-LocalRegAllocAssert2.ll | 1 - .../PowerPC/2008-02-09-LocalRegAllocAssert.ll | 1 - test/CodeGen/PowerPC/2008-03-06-KillInfo.ll | 21 - .../2009-08-17-inline-asm-addr-mode-breakage.ll | 4 +- test/CodeGen/PowerPC/2009-08-23-linkerprivate.ll | 2 +- test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll | 433 -- test/CodeGen/PowerPC/cr_spilling.ll | 1 - test/CodeGen/PowerPC/stack-protector.ll | 25 + test/CodeGen/Thumb/2009-07-19-SPDecBug.ll | 6 +- test/CodeGen/Thumb/2009-07-20-TwoAddrBug.ll | 2 +- test/CodeGen/Thumb/2009-07-27-PEIAssert.ll | 6 +- test/CodeGen/Thumb/2009-08-12-ConstIslandAssert.ll | 2 +- test/CodeGen/Thumb/2009-08-12-RegInfoAssert.ll | 10 +- test/CodeGen/Thumb/2009-08-20-ISelBug.ll | 14 +- .../Thumb/2009-12-17-pre-regalloc-taildup.ll | 2 +- .../Thumb/2010-01-15-local-alloc-spill-physical.ll | 7 +- test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll | 8 + test/CodeGen/Thumb/2010-07-01-FuncAlign.ll | 6 + test/CodeGen/Thumb/asmprinter-bug.ll | 24 +- test/CodeGen/Thumb/machine-licm.ll | 2 +- test/CodeGen/Thumb/pop.ll | 2 +- test/CodeGen/Thumb/push.ll | 8 +- test/CodeGen/Thumb/trap.ll | 2 +- .../CodeGen/Thumb2/2009-07-17-CrossRegClassCopy.ll | 6 +- test/CodeGen/Thumb2/2009-07-21-ISelBug.ll | 6 +- test/CodeGen/Thumb2/2009-07-23-CPIslandBug.ll | 2 +- test/CodeGen/Thumb2/2009-07-30-PEICrash.ll | 2 +- test/CodeGen/Thumb2/2009-08-01-WrongLDRBOpc.ll | 2 +- test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll | 10 +- test/CodeGen/Thumb2/2009-08-04-CoalescerAssert.ll | 6 +- test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll | 26 +- test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll | 66 +- .../CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll | 2 +- .../Thumb2/2009-08-04-SubregLoweringBug2.ll | 2 +- .../Thumb2/2009-08-04-SubregLoweringBug3.ll | 2 +- test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 10 +- test/CodeGen/Thumb2/2009-08-07-NeonFPBug.ll | 2 +- test/CodeGen/Thumb2/2009-08-08-ScavengerAssert.ll | 2 +- test/CodeGen/Thumb2/2009-08-10-ISelBug.ll | 2 +- test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll | 10 +- test/CodeGen/Thumb2/2009-11-01-CopyReg2RegBug.ll | 2 +- test/CodeGen/Thumb2/2009-11-11-ScavengerAssert.ll | 18 +- test/CodeGen/Thumb2/2009-11-13-STRDBug.ll | 2 +- test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll | 10 +- .../Thumb2/2010-01-06-TailDuplicateLabels.ll | 26 +- test/CodeGen/Thumb2/2010-01-19-RemovePredicates.ll | 8 +- test/CodeGen/Thumb2/2010-02-11-phi-cycle.ll | 16 +- test/CodeGen/Thumb2/2010-02-24-BigStack.ll | 2 +- test/CodeGen/Thumb2/2010-03-08-addi12-ccout.ll | 16 +- test/CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll | 10 +- test/CodeGen/Thumb2/2010-04-15-DynAllocBug.ll | 2 +- test/CodeGen/Thumb2/2010-04-26-CopyRegCrash.ll | 2 +- test/CodeGen/Thumb2/2010-05-24-rsbs.ll | 2 +- test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll | 41 + test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll | 35 + test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll | 127 + test/CodeGen/Thumb2/crash.ll | 49 + test/CodeGen/Thumb2/cross-rc-coalescing-1.ll | 6 +- test/CodeGen/Thumb2/cross-rc-coalescing-2.ll | 2 +- test/CodeGen/Thumb2/frameless.ll | 2 +- test/CodeGen/Thumb2/frameless2.ll | 2 +- test/CodeGen/Thumb2/ifcvt-neon.ll | 2 +- test/CodeGen/Thumb2/ldr-str-imm12.ll | 9 +- test/CodeGen/Thumb2/lsr-deficiency.ll | 2 +- test/CodeGen/Thumb2/machine-licm.ll | 6 +- test/CodeGen/Thumb2/pic-load.ll | 6 +- test/CodeGen/Thumb2/sign_extend_inreg.ll | 22 - test/CodeGen/Thumb2/thumb2-call-tc.ll | 37 + test/CodeGen/Thumb2/thumb2-call.ll | 2 +- test/CodeGen/Thumb2/thumb2-cbnz.ll | 6 +- test/CodeGen/Thumb2/thumb2-eor.ll | 9 +- test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll | 86 + test/CodeGen/Thumb2/thumb2-ifcvt1.ll | 2 +- test/CodeGen/Thumb2/thumb2-ifcvt2.ll | 3 +- test/CodeGen/Thumb2/thumb2-ifcvt3.ll | 2 +- test/CodeGen/Thumb2/thumb2-ldm.ll | 6 +- test/CodeGen/Thumb2/thumb2-select_xform.ll | 2 +- test/CodeGen/Thumb2/thumb2-spill-q.ll | 2 +- test/CodeGen/Thumb2/thumb2-tbh.ll | 26 +- test/CodeGen/Thumb2/thumb2-uxtb.ll | 93 +- test/CodeGen/X86/2006-11-17-IllegalMove.ll | 6 +- test/CodeGen/X86/2007-01-08-InstrSched.ll | 8 +- test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll | 5 +- .../X86/2007-11-04-rip-immediate-constant.ll | 4 +- test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll | 68 - test/CodeGen/X86/2008-01-16-FPStackifierAssert.ll | 1 - test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll | 1 - test/CodeGen/X86/2008-03-10-RegAllocInfLoop.ll | 1 - test/CodeGen/X86/2008-03-18-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-04-09-BranchFolding.ll | 2 +- test/CodeGen/X86/2008-04-15-LiveVariableBug.ll | 1 - test/CodeGen/X86/2008-05-21-CoalescerBug.ll | 2 +- test/CodeGen/X86/2008-05-28-LocalRegAllocBug.ll | 1 - test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll | 8 +- test/CodeGen/X86/2008-09-17-inline-asm-1.ll | 1 - test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 5 +- test/CodeGen/X86/2008-10-16-SpillerBug.ll | 160 - test/CodeGen/X86/2009-01-12-CoalescerBug.ll | 84 - test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll | 1 - test/CodeGen/X86/2009-04-14-IllegalRegs.ll | 1 - test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 +- test/CodeGen/X86/2009-04-24.ll | 4 +- test/CodeGen/X86/2009-08-23-linkerprivate.ll | 2 +- test/CodeGen/X86/2009-09-07-CoalescerBug.ll | 47 - .../X86/2009-09-19-SchedCustomLoweringBug.ll | 29 - test/CodeGen/X86/2009-12-12-CoalescerBug.ll | 40 - test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll | 6 +- test/CodeGen/X86/2010-03-17-ISelBug.ll | 28 + .../X86/2010-04-30-LocalAlloc-LandingPad.ll | 1 - .../X86/2010-05-05-LocalAllocEarlyClobber.ll | 1 - .../X86/2010-05-06-LocalInlineAsmClobber.ll | 1 - test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll | 17 + test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll | 6 + .../X86/2010-06-15-FastAllocEarlyCLobber.ll | 29 + test/CodeGen/X86/2010-06-24-g-constraint-crash.ll | 15 + .../X86/2010-06-25-CoalescerSubRegDefDead.ll | 39 + test/CodeGen/X86/2010-06-25-asm-RA-crash.ll | 19 + .../CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll | 22 + .../CodeGen/X86/2010-06-28-matched-g-constraint.ll | 11 + test/CodeGen/X86/2010-07-02-UnfoldBug.ll | 99 + test/CodeGen/X86/2010-07-02-asm-alignstack.ll | 31 + test/CodeGen/X86/2010-07-06-DbgCrash.ll | 29 + test/CodeGen/X86/2010-07-06-asm-RIP.ll | 21 + test/CodeGen/X86/alloca-align-rounding-32.ll | 15 + test/CodeGen/X86/alloca-align-rounding.ll | 9 +- test/CodeGen/X86/break-sse-dep.ll | 45 +- test/CodeGen/X86/crash-O0.ll | 31 + test/CodeGen/X86/crash.ll | 11 + test/CodeGen/X86/fast-isel-bc.ll | 2 +- test/CodeGen/X86/fast-isel-gep.ll | 19 + test/CodeGen/X86/fast-isel-loads.ll | 23 + test/CodeGen/X86/fast-isel-shift-imm.ll | 5 +- test/CodeGen/X86/fast-isel-x86.ll | 33 + test/CodeGen/X86/fast-isel.ll | 14 +- test/CodeGen/X86/fp-stack-O0-crash.ll | 1 - test/CodeGen/X86/hidden-vis-5.ll | 30 - test/CodeGen/X86/hidden-vis-pic.ll | 55 + test/CodeGen/X86/imp-def-copies.ll | 29 - test/CodeGen/X86/inline-asm-fpstack.ll | 57 +- test/CodeGen/X86/inline-asm-fpstack2.ll | 15 +- test/CodeGen/X86/inline-asm-fpstack3.ll | 17 +- test/CodeGen/X86/inline-asm-fpstack4.ll | 19 +- test/CodeGen/X86/inline-asm-tied.ll | 2 +- test/CodeGen/X86/ins_subreg_coalesce-3.ll | 3 +- test/CodeGen/X86/iv-users-in-other-loops.ll | 2 +- test/CodeGen/X86/leaf-fp-elim.ll | 30 + test/CodeGen/X86/licm-nested.ll | 89 + test/CodeGen/X86/liveness-local-regalloc.ll | 1 - test/CodeGen/X86/local-liveness.ll | 31 - test/CodeGen/X86/loop-strength-reduce6.ll | 18 +- test/CodeGen/X86/lsr-delayed-fold.ll | 44 + test/CodeGen/X86/lsr-loop-exit-cond.ll | 2 +- test/CodeGen/X86/lsr-nonaffine.ll | 23 + test/CodeGen/X86/lsr-reuse.ll | 309 + test/CodeGen/X86/memcpy.ll | 52 +- test/CodeGen/X86/object-size.ll | 2 +- test/CodeGen/X86/optimize-max-3.ll | 46 +- test/CodeGen/X86/phys-reg-local-regalloc.ll | 4 +- test/CodeGen/X86/pic.ll | 6 +- test/CodeGen/X86/pr2659.ll | 2 +- test/CodeGen/X86/promote-assert-zext.ll | 22 + test/CodeGen/X86/shift-folding.ll | 5 + test/CodeGen/X86/sibcall-3.ll | 16 + test/CodeGen/X86/sink-hoist.ll | 39 +- test/CodeGen/X86/sse-commute.ll | 20 + test/CodeGen/X86/sse-minmax.ll | 120 +- test/CodeGen/X86/sse3.ll | 6 +- test/CodeGen/X86/stack-align.ll | 9 +- test/CodeGen/X86/stack-protector-linux.ll | 28 + test/CodeGen/X86/store-narrow.ll | 2 +- test/CodeGen/X86/switch-bt.ll | 51 + test/CodeGen/X86/tailcallstack64.ll | 4 +- test/CodeGen/X86/tls-1.ll | 4 +- test/CodeGen/X86/v-binop-widen.ll | 12 + test/CodeGen/X86/v-binop-widen2.ll | 40 + test/CodeGen/X86/v2f32.ll | 39 + test/CodeGen/X86/vec-trunc-store.ll | 12 +- test/CodeGen/X86/vec_shuffle-6.ll | 2 +- test/CodeGen/X86/vector-intrinsics.ll | 27 +- test/CodeGen/X86/volatile.ll | 2 +- test/CodeGen/X86/widen_shuffle-1.ll | 39 +- test/CodeGen/X86/widen_shuffle-2.ll | 13 - test/CodeGen/X86/x86-64-tls-1.ll | 6 + test/CodeGen/X86/zext-sext.ll | 53 + test/DebugInfo/2010-05-25-DotDebugLoc.ll | 2 +- test/DebugInfo/2010-05-28-Crash.ll | 44 + test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll | 53 + test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll | 52 + test/Feature/linker_private_linkages.ll | 6 + test/FrontendC++/2010-02-17-DbgArtificialArg.cpp | 4 +- .../FrontendC++/2010-04-30-OptimizedMethod-Dbg.cpp | 5 +- test/FrontendC++/2010-06-21-LocalVarDbg.cpp | 13 + test/FrontendC++/2010-06-22-BitfieldInit.cpp | 20 + test/FrontendC++/2010-06-22-ZeroBitfield.cpp | 5 + test/FrontendC++/thunk-linkonce-odr.cpp | 33 + test/FrontendC++/thunk-weak-odr.cpp | 33 - test/FrontendC/2010-05-31-palignr.c | 24 + test/FrontendC/2010-06-11-SaveExpr.c | 8 + test/FrontendC/2010-06-17-asmcrash.c | 16 + test/FrontendC/2010-06-28-DbgEntryPC.c | 50 + test/FrontendC/2010-06-28-DbgLocalVar.c | 14 + test/FrontendC/2010-06-28-nowarn.c | 21 + test/FrontendC/2010-07-08-DeclDebugLineNo.c | 10 + test/FrontendC/pr5406.c | 2 +- test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m | 6 + test/LLVMC/C++/filelist.cpp | 3 + test/MC/AsmParser/X86/x86_32-encoding.s | 2558 +++++++- test/MC/AsmParser/X86/x86_32-new-encoder.s | 84 + test/MC/AsmParser/X86/x86_64-encoding.s | 2569 ++++++++ test/MC/AsmParser/X86/x86_64-new-encoder.s | 19 + test/MC/AsmParser/X86/x86_64-operands.s | 10 +- test/MC/AsmParser/directive_desc.s | 2 +- .../AsmParser/directive_subsections_via_symbols.s | 2 +- test/MC/AsmParser/directive_zerofill.s | 2 +- test/MC/AsmParser/exprs.s | 2 - test/MC/AsmParser/hello.s | 6 +- test/MC/Disassembler/simple-tests.txt | 3 + test/MC/MachO/jcc.s | 106 + test/Makefile | 3 +- test/Other/2010-05-06-Printer.ll | 2 +- test/Other/lint.ll | 74 +- test/Scripts/macho-dump.bat | 7 + test/TableGen/DefmInsideMultiClass.td | 25 + test/TableGen/LetInsideMultiClasses.td | 29 + test/TableGen/TargetInstrInfo.td | 3 +- test/TableGen/defmclass.td | 38 + test/TableGen/eqbit.td | 11 + test/TableGen/ifbit.td | 11 + test/TableGen/usevalname.td | 24 + test/Transforms/GVN/load-pre-align.ll | 2 +- test/Transforms/IndVarSimplify/indirectbr.ll | 19 +- .../IndVarSimplify/single-element-range.ll | 2 +- .../Transforms/IndVarSimplify/tripcount_compute.ll | 66 +- test/Transforms/Inline/2010-05-31-ByvalTailcall.ll | 24 + test/Transforms/InstCombine/alloca.ll | 20 +- test/Transforms/InstCombine/badmalloc.ll | 1 + test/Transforms/InstCombine/call.ll | 4 +- test/Transforms/InstCombine/cast.ll | 11 + test/Transforms/InstCombine/crash.ll | 6 +- test/Transforms/InstCombine/getelementptr.ll | 9 + test/Transforms/InstCombine/icmp.ll | 23 + test/Transforms/InstCombine/load.ll | 11 + test/Transforms/InstCombine/load2.ll | 11 - test/Transforms/InstCombine/load3.ll | 33 +- test/Transforms/InstCombine/malloc-free-delete.ll | 20 +- test/Transforms/InstCombine/select.ll | 32 + test/Transforms/JumpThreading/crash.ll | 46 + test/Transforms/LoopRotate/phi-duplicate.ll | 2 +- test/Transforms/LoopUnswitch/preserve-analyses.ll | 2 +- test/Transforms/Mem2Reg/ConvertDebugInfo.ll | 2 +- test/Transforms/Mem2Reg/ConvertDebugInfo2.ll | 52 + test/Transforms/PartialSpecialize/dg.exp | 3 + .../PartialSpecialize/two-specializations.ll | 33 + test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll | 4 +- test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll | 2 +- test/Transforms/ScalarRepl/address-space.ll | 35 + test/Transforms/ScalarRepl/memcpy-align.ll | 2 +- .../SimplifyCFG/trapping-load-unreachable.ll | 11 + .../SimplifyLibCalls/2010-05-30-memcpy-Struct.ll | 20 + test/Transforms/SimplifyLibCalls/PR7357.ll | 16 + test/Transforms/SimplifyLibCalls/StrNCmp.ll | 7 + test/Transforms/SimplifyLibCalls/StrStr.ll | 12 + .../StripSymbols/2010-06-30-StripDebug.ll | 28 + .../StripSymbols/2010-07-01-DeadDbgInfo.ll | 47 + .../2010-06-26-MultipleReturnValues.ll | 20 + test/lit.cfg | 7 + test/lit.site.cfg.in | 1 + tools/bugpoint/BugDriver.h | 6 +- tools/bugpoint/CrashDebugger.cpp | 31 +- tools/bugpoint/ExtractFunction.cpp | 28 +- tools/bugpoint/ListReducer.h | 4 +- tools/bugpoint/Miscompilation.cpp | 48 +- tools/bugpoint/ToolRunner.h | 2 +- tools/edis/EDDisassembler.cpp | 2 +- tools/gold/gold-plugin.cpp | 155 +- tools/llc/llc.cpp | 12 +- tools/llvm-extract/llvm-extract.cpp | 1 + tools/llvm-link/llvm-link.cpp | 20 +- tools/llvm-mc/Makefile | 1 - tools/llvm-mc/llvm-mc.cpp | 5 +- tools/llvm-nm/llvm-nm.cpp | 3 +- tools/llvmc/plugins/Base/Base.td.in | 12 +- tools/lto/LTOCodeGenerator.cpp | 10 +- tools/opt/GraphPrinters.cpp | 2 +- tools/opt/PrintSCC.cpp | 2 +- tools/opt/opt.cpp | 48 +- unittests/ADT/DAGDeltaAlgorithmTest.cpp | 122 + unittests/ADT/ValueMapTest.cpp | 6 +- unittests/Makefile.unittest | 8 +- utils/FileUpdate/FileUpdate.cpp | 1 + utils/NewNightlyTest.pl | 4 +- utils/TableGen/ARMDecoderEmitter.cpp | 9 + utils/TableGen/ARMDecoderEmitter.h | 2 +- utils/TableGen/AsmMatcherEmitter.cpp | 4 +- utils/TableGen/CMakeLists.txt | 4 +- utils/TableGen/ClangASTNodesEmitter.cpp | 116 +- utils/TableGen/ClangASTNodesEmitter.h | 58 +- utils/TableGen/ClangAttrEmitter.cpp | 84 + utils/TableGen/ClangAttrEmitter.h | 49 + utils/TableGen/CodeEmitterGen.cpp | 45 +- utils/TableGen/CodeGenInstruction.cpp | 11 - utils/TableGen/CodeGenTarget.cpp | 77 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 5 +- utils/TableGen/EDEmitter.cpp | 13 +- utils/TableGen/FastISelEmitter.cpp | 40 +- utils/TableGen/InstrInfoEmitter.cpp | 5 +- utils/TableGen/NeonEmitter.cpp | 1202 ++++ utils/TableGen/NeonEmitter.h | 122 + utils/TableGen/Record.cpp | 29 +- utils/TableGen/RegisterInfoEmitter.cpp | 134 +- utils/TableGen/TGParser.cpp | 165 +- utils/TableGen/TGParser.h | 11 +- utils/TableGen/TableGen.cpp | 115 +- utils/TableGen/X86RecognizableInstr.cpp | 19 +- utils/TableGen/X86RecognizableInstr.h | 2 + utils/buildit/build_llvm | 49 +- utils/count/count.c | 6 +- utils/lit/lit/TestRunner.py | 40 +- utils/unittest/UnitTestMain/Makefile | 8 +- utils/unittest/googletest/Makefile | 7 +- utils/unittest/googletest/README.LLVM | 10 +- utils/unittest/googletest/gtest-death-test.cc | 849 ++- utils/unittest/googletest/gtest-filepath.cc | 197 +- utils/unittest/googletest/gtest-port.cc | 541 +- utils/unittest/googletest/gtest-test-part.cc | 48 +- utils/unittest/googletest/gtest-typed-test.cc | 17 +- utils/unittest/googletest/gtest.cc | 2403 +++++--- .../googletest/include/gtest/gtest-death-test.h | 84 +- .../googletest/include/gtest/gtest-message.h | 18 +- .../googletest/include/gtest/gtest-param-test.h | 23 +- .../unittest/googletest/include/gtest/gtest-spi.h | 43 +- .../googletest/include/gtest/gtest-test-part.h | 65 +- .../googletest/include/gtest/gtest-typed-test.h | 14 +- utils/unittest/googletest/include/gtest/gtest.h | 1097 +++- .../gtest/internal/gtest-death-test-internal.h | 96 +- .../include/gtest/internal/gtest-filepath.h | 22 +- .../include/gtest/internal/gtest-internal-inl.h | 1015 ++- .../include/gtest/internal/gtest-internal.h | 181 +- .../include/gtest/internal/gtest-linked_ptr.h | 2 +- .../gtest/internal/gtest-param-util-generated.h | 310 +- .../include/gtest/internal/gtest-param-util.h | 68 +- .../googletest/include/gtest/internal/gtest-port.h | 1212 +++- .../include/gtest/internal/gtest-string.h | 165 +- .../include/gtest/internal/gtest-tuple.h | 968 +++ .../include/gtest/internal/gtest-type-util.h | 18 +- 1057 files changed, 48032 insertions(+), 27681 deletions(-) create mode 100644 include/llvm/ADT/DAGDeltaAlgorithm.h create mode 100644 include/llvm/Analysis/CodeMetrics.h create mode 100644 include/llvm/Analysis/Loads.h create mode 100644 include/llvm/CodeGen/FunctionLoweringInfo.h create mode 100644 include/llvm/CodeGen/PostRAHazardRecognizer.h create mode 100644 include/llvm/MC/MCObjectStreamer.h create mode 100644 include/llvm/MC/MCParser/MCAsmParserExtension.h create mode 100644 include/llvm/Support/COFF.h create mode 100644 include/llvm/Target/TargetCallingConv.h create mode 100644 lib/Analysis/Loads.cpp create mode 100644 lib/CodeGen/CallingConvLower.cpp delete mode 100644 lib/CodeGen/ExactHazardRecognizer.cpp delete mode 100644 lib/CodeGen/ExactHazardRecognizer.h create mode 100644 lib/CodeGen/InlineSpiller.cpp create mode 100644 lib/CodeGen/PostRAHazardRecognizer.cpp delete mode 100644 lib/CodeGen/RegAllocLocal.cpp delete mode 100644 lib/CodeGen/SelectionDAG/CallingConvLower.cpp delete mode 100644 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h delete mode 100644 lib/CodeGen/SimpleHazardRecognizer.h create mode 100644 lib/MC/MCObjectStreamer.cpp create mode 100644 lib/MC/MCParser/DarwinAsmParser.cpp create mode 100644 lib/MC/MCParser/ELFAsmParser.cpp create mode 100644 lib/MC/MCParser/MCAsmParserExtension.cpp create mode 100644 lib/MC/WinCOFFObjectWriter.cpp create mode 100644 lib/MC/WinCOFFStreamer.cpp create mode 100644 lib/Support/DAGDeltaAlgorithm.cpp create mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.cpp create mode 100644 lib/Target/ARM/Thumb2HazardRecognizer.h create mode 100644 lib/Transforms/Hello/Hello.exports create mode 100644 test/Analysis/BasicAA/args-rets-allocas-loads.ll create mode 100644 test/Analysis/BasicAA/unreachable-block.ll create mode 100644 test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll create mode 100644 test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll create mode 100755 test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll create mode 100644 test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll create mode 100644 test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll create mode 100644 test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll create mode 100644 test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll create mode 100644 test/CodeGen/ARM/call-tc.ll create mode 100644 test/CodeGen/ARM/crash-O0.ll create mode 100644 test/CodeGen/ARM/flag-crash.ll create mode 100644 test/CodeGen/ARM/fpcmp-opt.ll create mode 100644 test/CodeGen/ARM/machine-cse-cmp.ll create mode 100644 test/CodeGen/ARM/va_arg.ll create mode 100644 test/CodeGen/CellSPU/shuffles.ll create mode 100644 test/CodeGen/Generic/add-with-overflow-128.ll delete mode 100644 test/CodeGen/Generic/stack-protector.ll delete mode 100644 test/CodeGen/PowerPC/2008-03-06-KillInfo.ll delete mode 100644 test/CodeGen/PowerPC/2010-02-26-FoldFloats.ll create mode 100644 test/CodeGen/PowerPC/stack-protector.ll create mode 100644 test/CodeGen/Thumb/2010-06-18-SibCallCrash.ll create mode 100644 test/CodeGen/Thumb/2010-07-01-FuncAlign.ll create mode 100644 test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll create mode 100644 test/CodeGen/Thumb2/2010-06-19-ITBlockCrash.ll create mode 100644 test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll create mode 100644 test/CodeGen/Thumb2/crash.ll delete mode 100644 test/CodeGen/Thumb2/sign_extend_inreg.ll create mode 100644 test/CodeGen/Thumb2/thumb2-call-tc.ll create mode 100644 test/CodeGen/Thumb2/thumb2-ifcvt1-tc.ll delete mode 100644 test/CodeGen/X86/2007-11-14-Coalescer-Bug.ll delete mode 100644 test/CodeGen/X86/2008-10-16-SpillerBug.ll delete mode 100644 test/CodeGen/X86/2009-01-12-CoalescerBug.ll delete mode 100644 test/CodeGen/X86/2009-09-07-CoalescerBug.ll delete mode 100644 test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll delete mode 100644 test/CodeGen/X86/2009-12-12-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2010-06-09-FastAllocRegisters.ll create mode 100644 test/CodeGen/X86/2010-06-14-fast-isel-fs-load.ll create mode 100644 test/CodeGen/X86/2010-06-15-FastAllocEarlyCLobber.ll create mode 100644 test/CodeGen/X86/2010-06-24-g-constraint-crash.ll create mode 100644 test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll create mode 100644 test/CodeGen/X86/2010-06-25-asm-RA-crash.ll create mode 100644 test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll create mode 100644 test/CodeGen/X86/2010-06-28-matched-g-constraint.ll create mode 100644 test/CodeGen/X86/2010-07-02-UnfoldBug.ll create mode 100644 test/CodeGen/X86/2010-07-02-asm-alignstack.ll create mode 100644 test/CodeGen/X86/2010-07-06-DbgCrash.ll create mode 100644 test/CodeGen/X86/2010-07-06-asm-RIP.ll create mode 100644 test/CodeGen/X86/alloca-align-rounding-32.ll create mode 100644 test/CodeGen/X86/crash-O0.ll create mode 100644 test/CodeGen/X86/fast-isel-loads.ll create mode 100644 test/CodeGen/X86/fast-isel-x86.ll delete mode 100644 test/CodeGen/X86/hidden-vis-5.ll create mode 100644 test/CodeGen/X86/hidden-vis-pic.ll delete mode 100644 test/CodeGen/X86/imp-def-copies.ll create mode 100644 test/CodeGen/X86/leaf-fp-elim.ll create mode 100644 test/CodeGen/X86/licm-nested.ll delete mode 100644 test/CodeGen/X86/local-liveness.ll create mode 100644 test/CodeGen/X86/lsr-nonaffine.ll create mode 100644 test/CodeGen/X86/promote-assert-zext.ll create mode 100644 test/CodeGen/X86/sibcall-3.ll create mode 100644 test/CodeGen/X86/sse-commute.ll create mode 100644 test/CodeGen/X86/stack-protector-linux.ll create mode 100644 test/CodeGen/X86/switch-bt.ll create mode 100644 test/CodeGen/X86/v-binop-widen.ll create mode 100644 test/CodeGen/X86/v-binop-widen2.ll create mode 100644 test/CodeGen/X86/v2f32.ll delete mode 100644 test/CodeGen/X86/widen_shuffle-2.ll create mode 100644 test/CodeGen/X86/x86-64-tls-1.ll create mode 100644 test/CodeGen/X86/zext-sext.ll create mode 100644 test/DebugInfo/2010-05-28-Crash.ll create mode 100644 test/DebugInfo/2010-06-01-DeadArg-DbgInfo.ll create mode 100644 test/DebugInfo/2010-06-29-InlinedFnLocalVar.ll create mode 100644 test/Feature/linker_private_linkages.ll create mode 100644 test/FrontendC++/2010-06-21-LocalVarDbg.cpp create mode 100644 test/FrontendC++/2010-06-22-BitfieldInit.cpp create mode 100644 test/FrontendC++/2010-06-22-ZeroBitfield.cpp create mode 100644 test/FrontendC++/thunk-linkonce-odr.cpp delete mode 100644 test/FrontendC++/thunk-weak-odr.cpp create mode 100644 test/FrontendC/2010-05-31-palignr.c create mode 100644 test/FrontendC/2010-06-11-SaveExpr.c create mode 100644 test/FrontendC/2010-06-17-asmcrash.c create mode 100644 test/FrontendC/2010-06-28-DbgEntryPC.c create mode 100644 test/FrontendC/2010-06-28-DbgLocalVar.c create mode 100644 test/FrontendC/2010-06-28-nowarn.c create mode 100644 test/FrontendC/2010-07-08-DeclDebugLineNo.c create mode 100644 test/FrontendObjC/2010-06-04-UnnamedCFString-dbg.m create mode 100644 test/LLVMC/C++/filelist.cpp create mode 100644 test/MC/MachO/jcc.s create mode 100644 test/Scripts/macho-dump.bat create mode 100644 test/TableGen/DefmInsideMultiClass.td create mode 100644 test/TableGen/LetInsideMultiClasses.td create mode 100644 test/TableGen/defmclass.td create mode 100644 test/TableGen/eqbit.td create mode 100644 test/TableGen/ifbit.td create mode 100644 test/TableGen/usevalname.td create mode 100644 test/Transforms/Inline/2010-05-31-ByvalTailcall.ll delete mode 100644 test/Transforms/InstCombine/load2.ll create mode 100644 test/Transforms/Mem2Reg/ConvertDebugInfo2.ll create mode 100644 test/Transforms/PartialSpecialize/dg.exp create mode 100644 test/Transforms/PartialSpecialize/two-specializations.ll create mode 100644 test/Transforms/ScalarRepl/address-space.ll create mode 100644 test/Transforms/SimplifyLibCalls/2010-05-30-memcpy-Struct.ll create mode 100644 test/Transforms/SimplifyLibCalls/PR7357.ll create mode 100644 test/Transforms/StripSymbols/2010-06-30-StripDebug.ll create mode 100644 test/Transforms/StripSymbols/2010-07-01-DeadDbgInfo.ll create mode 100644 test/Transforms/TailCallElim/2010-06-26-MultipleReturnValues.ll create mode 100644 unittests/ADT/DAGDeltaAlgorithmTest.cpp create mode 100644 utils/TableGen/ClangAttrEmitter.cpp create mode 100644 utils/TableGen/ClangAttrEmitter.h create mode 100644 utils/TableGen/NeonEmitter.cpp create mode 100644 utils/TableGen/NeonEmitter.h create mode 100644 utils/unittest/googletest/include/gtest/internal/gtest-tuple.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 9d0180b..d4f2221 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ project(LLVM) cmake_minimum_required(VERSION 2.6.1) set(PACKAGE_NAME llvm) -set(PACKAGE_VERSION 2.7svn) +set(PACKAGE_VERSION 2.8svn) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") set(PACKAGE_BUGREPORT "llvmbugs@cs.uiuc.edu") diff --git a/Makefile b/Makefile index 670c174..d42f887 100644 --- a/Makefile +++ b/Makefile @@ -64,7 +64,7 @@ endif ifeq ($(MAKECMDGOALS),install-clang) DIRS := tools/clang/tools/driver tools/clang/lib/Headers \ - tools/clang/lib/Runtime tools/clang/docs + tools/clang/runtime tools/clang/docs OPTIONAL_DIRS := NO_INSTALL = 1 endif @@ -180,8 +180,8 @@ $(FilesToConfigPATH) : $(LLVM_OBJ_ROOT)/% : $(LLVM_SRC_ROOT)/%.in # that it gets executed last. ifneq ($(BUILD_DIRS_ONLY),1) all:: - $(Echo) '*****' Completed $(BuildMode)$(AssertMode) Build -ifeq ($(BuildMode),Debug) + $(Echo) '*****' Completed $(BuildMode) Build +ifneq ($(ENABLE_OPTIMIZED),1) $(Echo) '*****' Note: Debug build can be 10 times slower than an $(Echo) '*****' optimized build. Use 'make ENABLE_OPTIMIZED=1' to $(Echo) '*****' make an optimized build. Alternatively you can diff --git a/Makefile.config.in b/Makefile.config.in index ec11bb3..1d54b31 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -222,8 +222,8 @@ RDYNAMIC := @RDYNAMIC@ # When ENABLE_PROFILING is enabled, profile instrumentation is done # and output is put into the "+Profile" directories, where -# is either Debug or Release depending on how other builkd -# flags are set.. Otherwise, output is put in the +# is either Debug or Release depending on how other build +# flags are set. Otherwise, output is put in the # directories. #ENABLE_PROFILING = 1 @ENABLE_PROFILING@ @@ -320,12 +320,6 @@ endif # Location of the plugin header file for gold. BINUTILS_INCDIR := @BINUTILS_INCDIR@ -C_INCLUDE_DIRS := @C_INCLUDE_DIRS@ -CXX_INCLUDE_ROOT := @CXX_INCLUDE_ROOT@ -CXX_INCLUDE_ARCH := @CXX_INCLUDE_ARCH@ -CXX_INCLUDE_32BIT_DIR = @CXX_INCLUDE_32BIT_DIR@ -CXX_INCLUDE_64BIT_DIR = @CXX_INCLUDE_64BIT_DIR@ - # When ENABLE_LLVMC_DYNAMIC is enabled, LLVMC will link libCompilerDriver # dynamically. This is needed to make dynamic plugins work on some targets # (Windows). @@ -344,5 +338,5 @@ NO_MISSING_FIELD_INITIALIZERS = @NO_MISSING_FIELD_INITIALIZERS@ NO_VARIADIC_MACROS = @NO_VARIADIC_MACROS@ # Flags supported by the linker. -# bfd ld / gold -retain-symbols-file file -HAVE_LINK_RETAIN_SYMBOLS_FILE = @HAVE_LINK_RETAIN_SYMBOLS_FILE@ +# bfd ld / gold --version-script=file +HAVE_LINK_VERSION_SCRIPT = @HAVE_LINK_VERSION_SCRIPT@ diff --git a/Makefile.rules b/Makefile.rules index 4085881..dc15c92 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -398,12 +398,11 @@ endif # If DISABLE_ASSERTIONS=1 is specified (make command line or configured), # then disable assertions by defining the appropriate preprocessor symbols. -ifdef DISABLE_ASSERTIONS - # Indicate that assertions are turned off using a minus sign - BuildMode := $(BuildMode)-Asserts - CPP.Defines += -DNDEBUG -else +ifndef DISABLE_ASSERTIONS + BuildMode := $(BuildMode)+Asserts CPP.Defines += -D_DEBUG +else + CPP.Defines += -DNDEBUG endif # If ENABLE_EXPENSIVE_CHECKS=1 is specified (make command line or @@ -807,7 +806,8 @@ SubDirs += $(DIRS) ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT)) $(RecursiveTargets):: $(Verb) for dir in $(DIRS); do \ - if [ ! -f $$dir/Makefile ]; then \ + if ([ ! -f $$dir/Makefile ] || \ + command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \ $(MKDIR) $$dir; \ $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \ fi; \ @@ -829,7 +829,8 @@ endif ifdef EXPERIMENTAL_DIRS $(RecursiveTargets):: $(Verb) for dir in $(EXPERIMENTAL_DIRS); do \ - if [ ! -f $$dir/Makefile ]; then \ + if ([ ! -f $$dir/Makefile ] || \ + command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \ $(MKDIR) $$dir; \ $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \ fi; \ @@ -863,7 +864,9 @@ unitcheck:: $(addsuffix /.makeunitcheck,$(PARALLEL_DIRS)) ParallelTargets := $(foreach T,$(RecursiveTargets),%/.make$(T)) $(ParallelTargets) : - $(Verb) if [ ! -f $(@D)/Makefile ]; then \ + $(Verb) if ([ ! -f $(@D)/Makefile ] || \ + command test $(@D)/Makefile -ot \ + $(PROJ_SRC_DIR)/$(@D)/Makefile ); then \ $(MKDIR) $(@D); \ $(CP) $(PROJ_SRC_DIR)/$(@D)/Makefile $(@D)/Makefile; \ fi; \ @@ -882,7 +885,8 @@ ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT)) $(RecursiveTargets):: $(Verb) for dir in $(OPTIONAL_DIRS); do \ if [ -d $(PROJ_SRC_DIR)/$$dir ]; then\ - if [ ! -f $$dir/Makefile ]; then \ + if ([ ! -f $$dir/Makefile ] || \ + command test $$dir/Makefile -ot $(PROJ_SRC_DIR)/$$dir/Makefile ); then \ $(MKDIR) $$dir; \ $(CP) $(PROJ_SRC_DIR)/$$dir/Makefile $$dir/Makefile; \ fi; \ @@ -930,7 +934,7 @@ endif endif ############################################################################### -# Set up variables for building libararies +# Set up variables for building libraries ############################################################################### #--------------------------------------------------------- @@ -986,12 +990,25 @@ ifeq ($(HOST_OS),Darwin) # Darwin convention prefixes symbols with underscores. NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).sed $(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir - $(Verb) sed -e 's/[[:<:]]/_/' < $< > $@ + $(Verb) sed -e 's/^/_/' < $< > $@ +clean-local:: + -$(Verb) $(RM) -f $(NativeExportsFile) +else +ifeq ($(HAVE_LINK_VERSION_SCRIPT),1) +# Gold and BFD ld require a version script rather than a plain list. +NativeExportsFile := $(ObjDir)/$(notdir $(EXPORTED_SYMBOL_FILE)).map +$(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir + $(Verb) echo "{" > $@ + $(Verb) grep -q "\<" $< && echo " global:" >> $@ || : + $(Verb) sed -e 's/$$/;/' -e 's/^/ /' < $< >> $@ + $(Verb) echo " local: *;" >> $@ + $(Verb) echo "};" >> $@ clean-local:: -$(Verb) $(RM) -f $(NativeExportsFile) else NativeExportsFile := $(EXPORTED_SYMBOL_FILE) endif +endif # Now add the linker command-line options to use the native export file. @@ -1000,8 +1017,8 @@ LLVMLibsOptions += -Wl,-exported_symbols_list,$(NativeExportsFile) endif # gold, bfd ld, etc. -ifeq ($(HAVE_LINK_RETAIN_SYMBOLS_FILE),1) -LLVMLibsOptions += -Wl,-retain-symbols-file,$(NativeExportsFile) +ifeq ($(HAVE_LINK_VERSION_SCRIPT),1) +LLVMLibsOptions += -Wl,--version-script,$(NativeExportsFile) endif endif @@ -1113,7 +1130,7 @@ $(LibName.SO): $(ObjectsO) $(ProjLibsPaths) $(LLVMLibsPaths) $(LibDir)/.dir $(ProjLibsOptions) $(LLVMLibsOptions) $(LIBS) else $(LibName.SO): $(ObjectsO) $(LibDir)/.dir - $(Echo) Linking $(BuildMode) Shared Library $(LIBRARYNAME)$(SHLIBEXT) + $(Echo) Linking $(BuildMode) Shared Library $(basename $@) $(Verb) $(Link) $(SharedLinkOptions) -o $@ $(ObjectsO) endif diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 8487d94..be320cf 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1039,8 +1039,8 @@ AC_LINK_USE_R dnl Determine whether the linker supports the -export-dynamic option. AC_LINK_EXPORT_DYNAMIC -dnl Determine whether the linker supports the -retain-symbols-file option. -AC_LINK_RETAIN_SYMBOLS_FILE +dnl Determine whether the linker supports the --version-script option. +AC_LINK_VERSION_SCRIPT dnl Check for libtool and the library that has dlopen function (which must come dnl before the AC_PROG_LIBTOOL check in order to enable dlopening libraries with @@ -1284,6 +1284,9 @@ if test "$llvm_cv_enable_libffi" = "yes" ; then AC_CHECK_HEADERS([ffi.h ffi/ffi.h]) fi +dnl Try to find Darwin specific crash reporting library. +AC_CHECK_HEADERS([CrashReporterClient.h]) + dnl===-----------------------------------------------------------------------=== dnl=== dnl=== SECTION 7: Check for types and structures diff --git a/autoconf/m4/link_options.m4 b/autoconf/m4/link_options.m4 index 697abab..b48710c 100644 --- a/autoconf/m4/link_options.m4 +++ b/autoconf/m4/link_options.m4 @@ -40,14 +40,14 @@ if test "$llvm_cv_link_use_export_dynamic" = yes ; then ]) # -# Determine if the system can handle the -retain-symbols-file option being +# Determine if the system can handle the --version-script option being # passed to the linker. # # This macro is specific to LLVM. # -AC_DEFUN([AC_LINK_RETAIN_SYMBOLS_FILE], -[AC_CACHE_CHECK([for compiler -Wl,-retain-symbols-file option], - [llvm_cv_link_use_retain_symbols_file], +AC_DEFUN([AC_LINK_VERSION_SCRIPT], +[AC_CACHE_CHECK([for compiler -Wl,--version-script option], + [llvm_cv_link_use_version_script], [ AC_LANG_PUSH([C]) oldcflags="$CFLAGS" @@ -67,18 +67,21 @@ AC_DEFUN([AC_LINK_RETAIN_SYMBOLS_FILE], (umask 077 && mkdir "$tmp") } || exit $? - echo "main" > "$tmp/exports" + echo "{" > "$tmp/export.map" + echo " global: main;" >> "$tmp/export.map" + echo " local: *;" >> "$tmp/export.map" + echo "};" >> "$tmp/export.map" - CFLAGS="$CFLAGS -Wl,-retain-symbols-file=$tmp/exports" + CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map" AC_LINK_IFELSE([AC_LANG_PROGRAM([[]],[[]])], - [llvm_cv_link_use_retain_symbols_file=yes],[llvm_cv_link_use_retain_symbols_file=no]) - rm "$tmp/exports" + [llvm_cv_link_use_version_script=yes],[llvm_cv_link_use_version_script=no]) + rm "$tmp/export.map" rmdir "$tmp" CFLAGS="$oldcflags" AC_LANG_POP([C]) ]) -if test "$llvm_cv_link_use_retain_symbols_file" = yes ; then - AC_SUBST(HAVE_LINK_RETAIN_SYMBOLS_FILE,1) +if test "$llvm_cv_link_use_version_script" = yes ; then + AC_SUBST(HAVE_LINK_VERSION_SCRIPT,1) fi ]) diff --git a/bindings/ada/llvm/llvm.ads b/bindings/ada/llvm/llvm.ads index d9820f1..ce74e67 100644 --- a/bindings/ada/llvm/llvm.ads +++ b/bindings/ada/llvm/llvm.ads @@ -316,7 +316,8 @@ package llvm is LLVMExternalWeakLinkage, LLVMGhostLinkage, LLVMCommonLinkage, - LLVMLinkerPrivateLinkage); + LLVMLinkerPrivateLinkage, + LLVMLinkerPrivateWeakLinkage); for LLVMLinkage use (LLVMExternalLinkage => 0, @@ -333,7 +334,8 @@ package llvm is LLVMExternalWeakLinkage => 11, LLVMGhostLinkage => 12, LLVMCommonLinkage => 13, - LLVMLinkerPrivateLinkage => 14); + LLVMLinkerPrivateLinkage => 14, + LLVMLinkerPrivateWeakLinkage => 15); pragma Convention (C, LLVMLinkage); diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index e72560b..d9450d9 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -4,7 +4,7 @@ include(CheckSymbolExists) include(CheckFunctionExists) include(CheckCXXSourceCompiles) -if( UNIX ) +if( UNIX AND NOT BEOS ) # Used by check_symbol_exists: set(CMAKE_REQUIRED_LIBRARIES m) endif() diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake index f6da1b8..cf7cd1f 100644 --- a/cmake/modules/TableGen.cmake +++ b/cmake/modules/TableGen.cmake @@ -6,10 +6,16 @@ macro(tablegen ofn) file(GLOB local_tds "*.td") file(GLOB_RECURSE global_tds "${LLVM_MAIN_SRC_DIR}/include/llvm/*.td") + if (IS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) + set(LLVM_TARGET_DEFINITIONS_ABSOLUTE ${LLVM_TARGET_DEFINITIONS}) + else() + set(LLVM_TARGET_DEFINITIONS_ABSOLUTE + ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS}) + endif() add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn} COMMAND ${LLVM_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR} -I ${LLVM_MAIN_SRC_DIR}/lib/Target -I ${LLVM_MAIN_INCLUDE_DIR} - ${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS} + ${LLVM_TARGET_DEFINITIONS_ABSOLUTE} -o ${CMAKE_CURRENT_BINARY_DIR}/${ofn} DEPENDS tblgen ${local_tds} ${global_tds} COMMENT "Building ${ofn}..." diff --git a/configure b/configure index 755746f..dc1b5b3 100755 --- a/configure +++ b/configure @@ -752,7 +752,7 @@ OCAMLOPT OCAMLDEP OCAMLDOC GAS -HAVE_LINK_RETAIN_SYMBOLS_FILE +HAVE_LINK_VERSION_SCRIPT INSTALL_LTDL_TRUE INSTALL_LTDL_FALSE CONVENIENCE_LTDL_TRUE @@ -8905,9 +8905,9 @@ _ACEOF fi -{ echo "$as_me:$LINENO: checking for compiler -Wl,-retain-symbols-file option" >&5 -echo $ECHO_N "checking for compiler -Wl,-retain-symbols-file option... $ECHO_C" >&6; } -if test "${llvm_cv_link_use_retain_symbols_file+set}" = set; then +{ echo "$as_me:$LINENO: checking for compiler -Wl,--version-script option" >&5 +echo $ECHO_N "checking for compiler -Wl,--version-script option... $ECHO_C" >&6; } +if test "${llvm_cv_link_use_version_script+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else ac_ext=c @@ -8934,9 +8934,12 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu (umask 077 && mkdir "$tmp") } || exit $? - echo "main" > "$tmp/exports" + echo "{" > "$tmp/export.map" + echo " global: main;" >> "$tmp/export.map" + echo " local: *;" >> "$tmp/export.map" + echo "};" >> "$tmp/export.map" - CFLAGS="$CFLAGS -Wl,-retain-symbols-file=$tmp/exports" + CFLAGS="$CFLAGS -Wl,--version-script=$tmp/export.map" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -8986,17 +8989,17 @@ eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - llvm_cv_link_use_retain_symbols_file=yes + llvm_cv_link_use_version_script=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 - llvm_cv_link_use_retain_symbols_file=no + llvm_cv_link_use_version_script=no fi rm -f core conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext - rm "$tmp/exports" + rm "$tmp/export.map" rmdir "$tmp" CFLAGS="$oldcflags" ac_ext=c @@ -9007,10 +9010,10 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu fi -{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_retain_symbols_file" >&5 -echo "${ECHO_T}$llvm_cv_link_use_retain_symbols_file" >&6; } -if test "$llvm_cv_link_use_retain_symbols_file" = yes ; then - HAVE_LINK_RETAIN_SYMBOLS_FILE=1 +{ echo "$as_me:$LINENO: result: $llvm_cv_link_use_version_script" >&5 +echo "${ECHO_T}$llvm_cv_link_use_version_script" >&6; } +if test "$llvm_cv_link_use_version_script" = yes ; then + HAVE_LINK_VERSION_SCRIPT=1 fi @@ -11384,7 +11387,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } +else + # Is the header compilable? +{ echo "$as_me:$LINENO: checking $ac_header usability" >&5 +echo $ECHO_N "checking $ac_header usability... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +#include <$ac_header> +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_header_compiler=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_compiler=no +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_compiler" >&5 +echo "${ECHO_T}$ac_header_compiler" >&6; } + +# Is the header present? +{ echo "$as_me:$LINENO: checking $ac_header presence" >&5 +echo $ECHO_N "checking $ac_header presence... $ECHO_C" >&6; } +cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +#include <$ac_header> +_ACEOF +if { (ac_try="$ac_cpp conftest.$ac_ext" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_cpp conftest.$ac_ext") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } >/dev/null; then + if test -s conftest.err; then + ac_cpp_err=$ac_c_preproc_warn_flag + ac_cpp_err=$ac_cpp_err$ac_c_werror_flag + else + ac_cpp_err= + fi +else + ac_cpp_err=yes +fi +if test -z "$ac_cpp_err"; then + ac_header_preproc=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + ac_header_preproc=no +fi + +rm -f conftest.err conftest.$ac_ext +{ echo "$as_me:$LINENO: result: $ac_header_preproc" >&5 +echo "${ECHO_T}$ac_header_preproc" >&6; } + +# So? What about this header? +case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in + yes:no: ) + { echo "$as_me:$LINENO: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&5 +echo "$as_me: WARNING: $ac_header: accepted by the compiler, rejected by the preprocessor!" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the compiler's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the compiler's result" >&2;} + ac_header_preproc=yes + ;; + no:yes:* ) + { echo "$as_me:$LINENO: WARNING: $ac_header: present but cannot be compiled" >&5 +echo "$as_me: WARNING: $ac_header: present but cannot be compiled" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: check for missing prerequisite headers?" >&5 +echo "$as_me: WARNING: $ac_header: check for missing prerequisite headers?" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: see the Autoconf documentation" >&5 +echo "$as_me: WARNING: $ac_header: see the Autoconf documentation" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&5 +echo "$as_me: WARNING: $ac_header: section \"Present But Cannot Be Compiled\"" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: proceeding with the preprocessor's result" >&5 +echo "$as_me: WARNING: $ac_header: proceeding with the preprocessor's result" >&2;} + { echo "$as_me:$LINENO: WARNING: $ac_header: in the future, the compiler will take precedence" >&5 +echo "$as_me: WARNING: $ac_header: in the future, the compiler will take precedence" >&2;} + ( cat <<\_ASBOX +## ----------------------------------- ## +## Report this to llvmbugs@cs.uiuc.edu ## +## ----------------------------------- ## +_ASBOX + ) | sed "s/^/$as_me: WARNING: /" >&2 + ;; +esac +{ echo "$as_me:$LINENO: checking for $ac_header" >&5 +echo $ECHO_N "checking for $ac_header... $ECHO_C" >&6; } +if { as_var=$as_ac_Header; eval "test \"\${$as_var+set}\" = set"; }; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + eval "$as_ac_Header=\$ac_header_preproc" +fi +ac_res=`eval echo '${'$as_ac_Header'}'` + { echo "$as_me:$LINENO: result: $ac_res" >&5 +echo "${ECHO_T}$ac_res" >&6; } + +fi +if test `eval echo '${'$as_ac_Header'}'` = yes; then + cat >>confdefs.h <<_ACEOF +#define `echo "HAVE_$ac_header" | $as_tr_cpp` 1 +_ACEOF + +fi + +done + + + { echo "$as_me:$LINENO: checking for HUGE_VAL sanity" >&5 @@ -21297,7 +21470,7 @@ OCAMLOPT!$OCAMLOPT$ac_delim OCAMLDEP!$OCAMLDEP$ac_delim OCAMLDOC!$OCAMLDOC$ac_delim GAS!$GAS$ac_delim -HAVE_LINK_RETAIN_SYMBOLS_FILE!$HAVE_LINK_RETAIN_SYMBOLS_FILE$ac_delim +HAVE_LINK_VERSION_SCRIPT!$HAVE_LINK_VERSION_SCRIPT$ac_delim INSTALL_LTDL_TRUE!$INSTALL_LTDL_TRUE$ac_delim INSTALL_LTDL_FALSE!$INSTALL_LTDL_FALSE$ac_delim CONVENIENCE_LTDL_TRUE!$CONVENIENCE_LTDL_TRUE$ac_delim diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html index 0413622..a23d908 100644 --- a/docs/AliasAnalysis.html +++ b/docs/AliasAnalysis.html @@ -31,6 +31,7 @@
  • AliasAnalysis chaining behavior
  • Updating analysis results for transformations
  • Efficiency Issues
  • +
  • Pass Manager Issues
  • @@ -116,6 +117,11 @@ as the actual call or invoke instructions that performs the call. The AliasAnalysis interface also exposes some helper methods which allow you to get mod/ref information for arbitrary instructions.

    +

    All AliasAnalysis interfaces require that in queries involving +multiple values, values which are not +constants are all defined within the +same function.

    + @@ -180,9 +186,13 @@ that the accesses alias.

    -The alias method is the primary interface used to determine whether or -not two memory objects alias each other. It takes two memory objects as input -and returns MustAlias, MayAlias, or NoAlias as appropriate. +

    The alias method is the primary interface used to determine whether +or not two memory objects alias each other. It takes two memory objects as +input and returns MustAlias, MayAlias, or NoAlias as appropriate.

    + +

    Like all AliasAnalysis interfaces, the alias method requires +that either the two pointer values be defined within the same function, or at +least one of the values is a constant.

    @@ -191,12 +201,18 @@ and returns MustAlias, MayAlias, or NoAlias as appropriate.
    -

    The NoAlias response is used when the two pointers refer to distinct objects, -regardless of whether the pointers compare equal. For example, freed pointers -don't alias any pointers that were allocated afterwards. As a degenerate case, -pointers returned by malloc(0) have no bytes for an object, and are considered -NoAlias even when malloc returns the same pointer. The same rule applies to -NULL pointers.

    +

    The NoAlias response may be used when there is never an immediate dependence +between any memory reference based on one pointer and any memory +reference based the other. The most obvious example is when the two +pointers point to non-overlapping memory ranges. Another is when the two +pointers are only ever used for reading memory. Another is when the memory is +freed and reallocated between accesses through one pointer and accesses through +the other -- in this case, there is a dependence, but it's mediated by the free +and reallocation.

    + +

    As an exception to this is with the +noalias keyword; the "irrelevant" +dependencies are ignored.

    The MayAlias response is used whenever the two pointers might refer to the same object. If the two memory objects overlap, but do not start at the same @@ -502,6 +518,45 @@ method as possible (within reason).

    + + + +
    + +

    PassManager support for alternative AliasAnalysis implementation +has some issues.

    + +

    There is no way to override the default alias analysis. It would +be very useful to be able to do something like "opt -my-aa -O2" and +have it use -my-aa for all passes which need AliasAnalysis, but there +is currently no support for that, short of changing the source code +and recompiling. Similarly, there is also no way of setting a chain +of analyses as the default.

    + +

    There is no way for transform passes to declare that they preserve +AliasAnalysis implementations. The AliasAnalysis +interface includes deleteValue and copyValue methods +which are intended to allow a pass to keep an AliasAnalysis consistent, +however there's no way for a pass to declare in its +getAnalysisUsage that it does so. Some passes attempt to use +AU.addPreserved<AliasAnalysis>, however this doesn't +actually have any effect. + +

    AliasAnalysisCounter (-count-aa) and AliasDebugger +(-debug-aa) are implemented as ModulePass classes, so if your +alias analysis uses FunctionPass, it won't be able to use +these utilities. If you try to use them, the pass manager will +silently route alias analysis queries directly to +BasicAliasAnalysis instead.

    + +

    Similarly, the opt -p option introduces ModulePass +passes between each pass, which prevents the use of FunctionPass +alias analysis passes.

    + +
    +
    Using alias analysis results @@ -749,6 +804,19 @@ module, it is not part of the LLVM core.

    + + + +
    + +

    The -scev-aa pass implements AliasAnalysis queries by +translating them into ScalarEvolution queries. This gives it a +more complete understanding of getelementptr instructions +and loop induction variables than other alias analyses have.

    + +
    @@ -930,7 +998,7 @@ analysis directly.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-07 16:27:09 +0200 (Wed, 07 Jul 2010) $ diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index ec050bf..4071787 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -1594,22 +1594,22 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf, different register allocators:

      -
    • Simple — This is a very simple implementation that does not - keep values in registers across instructions. This register allocator - immediately spills every value right after it is computed, and reloads all - used operands from memory to temporary registers before each - instruction.
    • - -
    • Local — This register allocator is an improvement on the - Simple implementation. It allocates registers on a basic block - level, attempting to keep values in registers and reusing registers as - appropriate.
    • -
    • Linear ScanThe default allocator. This is the well-know linear scan register allocator. Whereas the Simple and Local algorithms use a direct mapping implementation technique, the Linear Scan implementation uses a spiller in order to place load and stores.
    • + +
    • Fast — This register allocator is the default for debug + builds. It allocates registers on a basic block level, attempting to keep + values in registers and reusing registers as appropriate.
    • + +
    • PBQP — A Partitioned Boolean Quadratic Programming (PBQP) + based register allocator. This allocator works by constructing a PBQP + problem representing the register allocation problem under consideration, + solving this using a PBQP solver, and mapping the solution back to a + register assignment.
    • +

    The type of register allocator used in llc can be chosen with the @@ -1617,9 +1617,9 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf,

    -$ llc -regalloc=simple file.bc -o sp.s;
    -$ llc -regalloc=local file.bc -o lc.s;
     $ llc -regalloc=linearscan file.bc -o ln.s;
    +$ llc -regalloc=fast file.bc -o fa.s;
    +$ llc -regalloc=pbqp file.bc -o pbqp.s;
     
    @@ -2162,7 +2162,7 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-15 23:58:33 +0200 (Tue, 15 Jun 2010) $ diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html index 92570f4..a9193f2 100644 --- a/docs/DebuggingJITedCode.html +++ b/docs/DebuggingJITedCode.html @@ -9,87 +9,24 @@
    Debugging JITed Code With GDB
      -
    1. Introduction
    2. -
    3. Quickstart
    4. -
    5. Example with clang and lli
    6. +
    7. Example usage
    8. +
    9. Background
    Written by Reid Kleckner
    - +
    -

    Without special runtime support, debugging dynamically generated code with -GDB (as well as most debuggers) can be quite painful. Debuggers generally read -debug information from the object file of the code, but for JITed code, there is -no such file to look for. -

    - -

    Depending on the architecture, this can impact the debugging experience in -different ways. For example, on most 32-bit x86 architectures, you can simply -compile with -fno-omit-framepointer for GCC and -fdisable-fp-elim for LLVM. -When GDB creates a backtrace, it can properly unwind the stack, but the stack -frames owned by JITed code have ??'s instead of the appropriate symbol name. -However, on Linux x86_64 in particular, GDB relies on the DWARF CFA debug -information to unwind the stack, so even if you compile your program to leave -the frame pointer untouched, GDB will usually be unable to unwind the stack past -any JITed code stack frames. -

    - -

    In order to communicate the necessary debug info to GDB, an interface for -registering JITed code with debuggers has been designed and implemented for -GDB and LLVM. At a high level, whenever LLVM generates new machine code, it -also generates an object file in memory containing the debug information. LLVM -then adds the object file to the global list of object files and calls a special -function (__jit_debug_register_code) marked noinline that GDB knows about. When -GDB attaches to a process, it puts a breakpoint in this function and loads all -of the object files in the global list. When LLVM calls the registration -function, GDB catches the breakpoint signal, loads the new object file from -LLVM's memory, and resumes the execution. In this way, GDB can get the -necessary debug information. +

    In order to debug code JITed by LLVM, you need GDB 7.0 or newer, which is +available on most modern distributions of Linux. The version of GDB that Apple +ships with XCode has been frozen at 6.3 for a while. LLDB may be a better +option for debugging JITed code on Mac OS X.

    -

    At the time of this writing, LLVM only supports architectures that use ELF -object files and it only generates symbols and DWARF CFA information. However, -it would be easy to add more information to the object file, so we don't need to -coordinate with GDB to get better debug information. -

    -
    - - - - -
    - -

    In order to debug code JITed by LLVM, you need to install a recent version -of GDB. The interface was added on 2009-08-19, so you need a snapshot of GDB -more recent than that. Either download a snapshot of GDB or checkout CVS as -instructed here. Here -are the commands for doing a checkout and building the code: -

    - -
    -$ cvs -z 3 -d :pserver:anoncvs@sourceware.org:/cvs/src co gdb
    -$ mv src gdb   # You probably don't want this checkout called "src".
    -$ cd gdb
    -$ ./configure --prefix="$GDB_INSTALL"
    -$ make
    -$ make install
    -
    - -

    You can then use -jit-emit-debug in the LLVM command line arguments to enable -the interface. -

    -
    - - - - -
    - -

    For example, consider debugging running lli on the following C code in -foo.c: +

    Consider debugging the following code compiled with clang and run through +lli:

    @@ -119,7 +56,9 @@ trace at the crash:
     
     # Compile foo.c to bitcode.  You can use either clang or llvm-gcc with this
     # command line.  Both require -fexceptions, or the calls are all marked
    -# 'nounwind' which disables DWARF CFA info.
    +# 'nounwind' which disables DWARF exception handling info.  Custom frontends
    +# should avoid adding this attribute to JITed code, since it interferes with
    +# DWARF CFA generation at the moment.
     $ clang foo.c -fexceptions -emit-llvm -c -o foo.bc
     
     # Run foo.bc under lli with -jit-emit-debug.  If you built lli in debug mode,
    @@ -143,18 +82,60 @@ Program received signal SIGSEGV, Segmentation fault.
     #3  0x00007ffff7f5502a in main ()
     #4  0x00000000007c0225 in llvm::JIT::runFunction(llvm::Function*,
         std::vector<llvm::GenericValue,
    -    std::allocator<llvm::GenericValue> > const&) ()
    +    std::allocator<llvm::GenericValue> > const&) ()
     #5  0x00000000007d6d98 in
         llvm::ExecutionEngine::runFunctionAsMain(llvm::Function*,
         std::vector<std::string,
    -    std::allocator<std::string> > const&, char const* const*) ()
    +    std::allocator<std::string> > const&, char const* const*) ()
     #6  0x00000000004dab76 in main ()
     
    -

    As you can see, GDB can correctly unwind the stack and has the appropriate function names.

    +
    + + + + +
    + +

    Without special runtime support, debugging dynamically generated code with +GDB (as well as most debuggers) can be quite painful. Debuggers generally read +debug information from the object file of the code, but for JITed code, there is +no such file to look for. +

    + +

    Depending on the architecture, this can impact the debugging experience in +different ways. For example, on most 32-bit x86 architectures, you can simply +compile with -fno-omit-frame-pointer for GCC and -disable-fp-elim for LLVM. +When GDB creates a backtrace, it can properly unwind the stack, but the stack +frames owned by JITed code have ??'s instead of the appropriate symbol name. +However, on Linux x86_64 in particular, GDB relies on the DWARF call frame +address (CFA) debug information to unwind the stack, so even if you compile +your program to leave the frame pointer untouched, GDB will usually be unable +to unwind the stack past any JITed code stack frames. +

    + +

    In order to communicate the necessary debug info to GDB, an interface for +registering JITed code with debuggers has been designed and implemented for +GDB and LLVM. At a high level, whenever LLVM generates new machine code, it +also generates an object file in memory containing the debug information. LLVM +then adds the object file to the global list of object files and calls a special +function (__jit_debug_register_code) marked noinline that GDB knows about. When +GDB attaches to a process, it puts a breakpoint in this function and loads all +of the object files in the global list. When LLVM calls the registration +function, GDB catches the breakpoint signal, loads the new object file from +LLVM's memory, and resumes the execution. In this way, GDB can get the +necessary debug information. +

    + +

    At the time of this writing, LLVM only supports architectures that use ELF +object files and it only generates symbols and DWARF CFA information. However, +it would be easy to add more information to the object file, so we don't need to +coordinate with GDB to get better debug information. +

    +

    @@ -165,7 +146,7 @@ function names. src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Reid Kleckner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-01-01 23:10:51 -0800 (Thu, 01 Jan 2009) $ + Last modified: $Date: 2010-07-07 22:16:45 +0200 (Wed, 07 Jul 2010) $ diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html index 2c85574..d324c15 100644 --- a/docs/ExceptionHandling.html +++ b/docs/ExceptionHandling.html @@ -404,7 +404,7 @@
    -  i8* %llvm.eh.exception( )
    +  i8* %llvm.eh.exception()
     

    This intrinsic returns a pointer to the exception structure.

    @@ -518,7 +518,7 @@
    -  i8* %llvm.eh.sjlj.lsda( )
    +  i8* %llvm.eh.sjlj.lsda()
     

    Used for SJLJ based exception handling, the @@ -619,7 +619,7 @@ Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-26 18:21:41 +0200 (Wed, 26 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/docs/FAQ.html b/docs/FAQ.html index 6e560f5..9415a90 100644 --- a/docs/FAQ.html +++ b/docs/FAQ.html @@ -803,7 +803,7 @@ define fastcc void @foo() { ret void } define void @bar() { - call void @foo( ) + call void @foo() ret void } @@ -931,7 +931,7 @@ F.i: src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html index 9e1c8b8..aa874ae 100644 --- a/docs/GetElementPtr.html +++ b/docs/GetElementPtr.html @@ -64,7 +64,8 @@

    This document seeks to dispel the mystery and confusion surrounding LLVM's - GetElementPtr (GEP) instruction. Questions about the wily GEP instruction are + GetElementPtr (GEP) instruction. + Questions about the wily GEP instruction are probably the most frequently occurring questions once a developer gets down to coding with LLVM. Here we lay out the sources of confusion and show that the GEP instruction is really quite simple. @@ -653,7 +654,8 @@ idx3 = (char*) &MyVar + 8

  • Support C, C-like languages, and languages which can be conceptually lowered into C (this covers a lot).
  • Support optimizations such as those that are common in - C compilers.
  • + C compilers. In particular, GEP is a cornerstone of LLVM's + pointer aliasing model.
  • Provide a consistent method for computing addresses so that address computations don't need to be a part of load and store instructions in the IR.
  • @@ -728,7 +730,7 @@ idx3 = (char*) &MyVar + 8 Valid HTML 4.01 The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-06 17:26:33 +0200 (Tue, 06 Jul 2010) $ diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html index 7103d13..7c105fe 100644 --- a/docs/GettingStarted.html +++ b/docs/GettingStarted.html @@ -1137,13 +1137,13 @@ platforms or configurations using the same source tree.

    named after the build type:

    -
    Debug Builds +
    Debug Builds with assertions enabled (the default)
    Tools -
    OBJ_ROOT/Debug/bin +
    OBJ_ROOT/Debug+Asserts/bin
    Libraries -
    OBJ_ROOT/Debug/lib +
    OBJ_ROOT/Debug+Asserts/lib


    @@ -1673,7 +1673,7 @@ out:

    Chris Lattner
    Reid Spencer
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-19 09:00:17 +0200 (Wed, 19 May 2010) $ + Last modified: $Date: 2010-07-08 10:27:18 +0200 (Thu, 08 Jul 2010) $ diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html index b966f45..7663321 100644 --- a/docs/HowToReleaseLLVM.html +++ b/docs/HowToReleaseLLVM.html @@ -213,13 +213,13 @@ Building the Release
    The build of llvm, llvm-gcc, and clang must be free -of errors and warnings in both debug, release, and release-asserts builds. +of errors and warnings in both debug, release+asserts, and release builds. If all builds are clean, then the release passes build qualification.
    1. debug: ENABLE_OPTIMIZED=0
    2. -
    3. release: ENABLE_OPTIMIZED=1
    4. -
    5. release-asserts: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
    6. +
    7. release+asserts: ENABLE_OPTIMIZED=1
    8. +
    9. release: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
    @@ -227,7 +227,7 @@ If all builds are clean, then the release passes build qualification.

    - Build both debug, release (optimized), and release-asserts versions of + Build both debug, release+asserts (optimized), and release versions of LLVM on all supported platforms. Direction to build llvm are here.

    @@ -264,7 +264,7 @@ If all builds are clean, then the release passes build qualification. Binary Distribution

    - Creating the Clang binary distribution (debug/release/release-asserts) requires + Creating the Clang binary distribution (debug/release/release) requires performing the following steps for each supported platform:

    @@ -429,7 +429,7 @@ Qualification Details

    - The final stages of the release process involving taging the release branch, + The final stages of the release process involving tagging the release branch, updating documentation that refers to the release, and updating the demo page.

    FIXME: Add a note if anything needs to be done to the clang website. @@ -517,7 +517,7 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-07 09:48:00 +0200 (Wed, 07 Jul 2010) $ diff --git a/docs/LangRef.html b/docs/LangRef.html index 119436a..ca988b7 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -24,6 +24,7 @@

    1. 'private' Linkage
    2. 'linker_private' Linkage
    3. +
    4. 'linker_private_weak' Linkage
    5. 'internal' Linkage
    6. 'available_externally' Linkage
    7. 'linkonce' Linkage
    8. @@ -369,11 +370,9 @@ what is considered 'well formed'. For example, the following instruction is syntactically okay, but not well formed:

      -
      -
      +
       %x = add i32 1, %x
       
      -

      because the definition of %x does not dominate all of its uses. The LLVM infrastructure provides a verification pass that may be used to verify @@ -436,29 +435,23 @@

      The easy way:

      -
      -
      +
       %result = mul i32 %X, 8
       
      -

      After strength reduction:

      -
      -
      +
       %result = shl i32 %X, i8 3
       
      -

      And the hard way:

      -
      -
      +
       %0 = add i32 %X, %X           ; yields {i32}:%0
       %1 = add i32 %0, %0           ; yields {i32}:%1
       %result = add i32 %1, %1
       
      -

      This last way of multiplying %X by 8 illustrates several important lexical features of LLVM:

      @@ -497,28 +490,26 @@ forward declarations, and merges symbol table entries. Here is an example of the "hello world" module:

      -
      -
      +
       ; Declare the string constant as a global constant.
       @.LC0 = internal constant [13 x i8] c"hello world\0A\00"    ; [13 x i8]*
       
       ; External declaration of the puts function
      -declare i32 @puts(i8 *)                                     ; i32(i8 *)* 
      +declare i32 @puts(i8*)                                     ; i32 (i8*)* 
       
       ; Definition of main function
       define i32 @main() {                                        ; i32()* 
         ; Convert [13 x i8]* to i8  *...
      -  %cast210 = getelementptr [13 x i8]* @.LC0, i64 0, i64 0   ; i8 *
      +  %cast210 = getelementptr [13 x i8]* @.LC0, i64 0, i64 0   ; i8*
       
         ; Call puts function to write out the string to stdout.
      -  call i32 @puts(i8 * %cast210)                             ; i32
      +  call i32 @puts(i8* %cast210)                             ; i32
         ret i32 0
      } ; Named metadata !1 = metadata !{i32 41} !foo = !{!1, null}
      -

      This example is made up of a global variable named ".LC0", an external declaration of the "puts" function, @@ -546,20 +537,24 @@ define i32 @main() { ; i32()*

      private
      -
      Global values with private linkage are only directly accessible by objects - in the current module. In particular, linking code into a module with an - private global value may cause the private to be renamed as necessary to - avoid collisions. Because the symbol is private to the module, all - references can be updated. This doesn't show up in any symbol table in the - object file.
      +
      Global values with "private" linkage are only directly accessible + by objects in the current module. In particular, linking code into a + module with an private global value may cause the private to be renamed as + necessary to avoid collisions. Because the symbol is private to the + module, all references can be updated. This doesn't show up in any symbol + table in the object file.
      linker_private
      -
      Similar to private, but the symbol is passed through the assembler and - removed by the linker after evaluation. Note that (unlike private - symbols) linker_private symbols are subject to coalescing by the linker: - weak symbols get merged and redefinitions are rejected. However, unlike - normal strong symbols, they are removed by the linker from the final - linked image (executable or dynamic library).
      +
      Similar to private, but the symbol is passed through the + assembler and evaluated by the linker. Unlike normal strong symbols, they + are removed by the linker from the final linked image (executable or + dynamic library).
      + +
      linker_private_weak
      +
      Similar to "linker_private", but the symbol is weak. Note that + linker_private_weak symbols are subject to coalescing by the + linker. The symbols are removed by the linker from the final linked image + (executable or dynamic library).
      internal
      Similar to private, but the value shows as a local symbol @@ -623,8 +618,8 @@ define i32 @main() { ; i32()*
      weak_odr
      Some languages allow differing globals to be merged, such as two functions with different semantics. Other languages, such as C++, ensure - that only equivalent globals are ever merged (the "one definition rule" - - "ODR"). Such languages can use the linkonce_odr + that only equivalent globals are ever merged (the "one definition rule" + — "ODR"). Such languages can use the linkonce_odr and weak_odr linkage types to indicate that the global will only be merged with equivalent globals. These linkage types are otherwise the same as their non-odr versions.
      @@ -788,11 +783,9 @@ define i32 @main() { ; i32()* it easier to read the IR and make the IR more condensed (particularly when recursive types are involved). An example of a name specification is:

      -
      -
      +
       %mytype = type { %mytype*, i32 }
       
      -

      You may give a name to any type except "void". Type name aliases may be used anywhere a type @@ -864,11 +857,9 @@ define i32 @main() { ; i32()*

      For example, the following defines a global in a numbered address space with an initializer, section, and alignment:

      -
      -
      +
       @G = addrspace(5) constant float 1.0, section "foo", align 4
       
      -
    @@ -921,15 +912,13 @@ define i32 @main() { ; i32()* alignments must be a power of 2.

    Syntax:
    -
    -
    +
     define [linkage] [visibility]
            [cconv] [ret attrs]
            <ResultType> @<FunctionName> ([argument list])
            [fn Attrs] [section "name"] [align N]
            [gc] { ... }
     
    -
    @@ -946,11 +935,9 @@ define [linkage] [visibility] optional visibility style.

    Syntax:
    -
    -
    +
     @<Name> = alias [Linkage] [Visibility] <AliaseeTy> @<Aliasee>
     
    -
    @@ -966,12 +953,10 @@ define [linkage] [visibility] a named metadata.

    Syntax:
    -
    -
    +
     !1 = metadata !{metadata !"one"}
     !name = !{null, !1}
     
    -
    @@ -991,13 +976,11 @@ define [linkage] [visibility] multiple parameter attributes are needed, they are space separated. For example:

    -
    -
    +
     declare i32 @printf(i8* noalias nocapture, ...)
     declare i32 @atoi(i8 zeroext)
     declare signext i8 @returns_signed_char()
     
    -

    Note that any attributes for the function result (nounwind, readonly) come immediately after the argument list.

    @@ -1037,7 +1020,7 @@ declare signext i8 @returns_signed_char() generator that usually indicates a desired alignment for the synthesized stack slot. -
    sret
    +
    sret
    This indicates that the pointer parameter specifies the address of a structure that is the return value of the function in the source program. This pointer must be guaranteed by the caller to be valid: loads and @@ -1045,22 +1028,34 @@ declare signext i8 @returns_signed_char() may only be applied to the first parameter. This is not a valid attribute for return values.
    -
    noalias
    -
    This indicates that the pointer does not alias any global or any other - parameter. The caller is responsible for ensuring that this is the - case. On a function return value, noalias additionally indicates - that the pointer does not alias any other pointers visible to the - caller. For further details, please see the discussion of the NoAlias - response in - alias - analysis.
    - -
    nocapture
    +
    noalias
    +
    This indicates that pointer values + based on the argument or return + value do not alias pointer values which are not based on it, + ignoring certain "irrelevant" dependencies. + For a call to the parent function, dependencies between memory + references from before or after the call and from those during the call + are "irrelevant" to the noalias keyword for the arguments and + return value used in that call. + The caller shares the responsibility with the callee for ensuring that + these requirements are met. + For further details, please see the discussion of the NoAlias response in + alias analysis.
    +
    + Note that this definition of noalias is intentionally + similar to the definition of restrict in C99 for function + arguments, though it is slightly weaker. +
    + For function return values, C99's restrict is not meaningful, + while LLVM's noalias is. +
    + +
    nocapture
    This indicates that the callee does not make any copies of the pointer that outlive the callee itself. This is not a valid attribute for return values.
    -
    nest
    +
    nest
    This indicates that the pointer parameter can be excised using the trampoline intrinsics. This is not a valid attribute for return values.
    @@ -1078,11 +1073,9 @@ declare signext i8 @returns_signed_char()

    Each function may specify a garbage collector name, which is simply a string:

    -
    -
    +
     define void @f() gc "name" { ... }
     
    -

    The compiler declares the supported values of name. Specifying a collector which will cause the compiler to alter its output in order to @@ -1105,14 +1098,12 @@ define void @f() gc "name" { ... }

    Function attributes are simple keywords that follow the type specified. If multiple attributes are needed, they are space separated. For example:

    -
    -
    +
     define void @f() noinline { ... }
     define void @f() alwaysinline { ... }
     define void @f() alwaysinline optsize { ... }
     define void @f() optsize { ... }
     
    -
    alignstack(<n>)
    @@ -1130,15 +1121,21 @@ define void @f() optsize { ... } this function is desirable (such as the "inline" keyword in C/C++). It is just a hint; it imposes no requirements on the inliner. +
    naked
    +
    This attribute disables prologue / epilogue emission for the function. + This can have very system-specific consequences.
    + +
    noimplicitfloat
    +
    This attributes disables implicit floating point instructions.
    +
    noinline
    This attribute indicates that the inliner should never inline this function in any situation. This attribute may not be used together with the alwaysinline attribute.
    -
    optsize
    -
    This attribute suggests that optimization passes and code generator passes - make choices that keep the code size of this function low, and otherwise - do optimizations specifically to reduce code size.
    +
    noredzone
    +
    This attribute indicates that the code generator should not use a red + zone, even if the target-specific ABI normally permits it.
    noreturn
    This function attribute indicates that the function never returns @@ -1150,6 +1147,11 @@ define void @f() optsize { ... } unwind or exceptional control flow. If the function does unwind, its runtime behavior is undefined.
    +
    optsize
    +
    This attribute suggests that optimization passes and code generator passes + make choices that keep the code size of this function low, and otherwise + do optimizations specifically to reduce code size.
    +
    readnone
    This attribute indicates that the function computes its result (or decides to unwind an exception) based strictly on its arguments, without @@ -1192,17 +1194,6 @@ define void @f() optsize { ... } function that doesn't have an sspreq attribute or which has an ssp attribute, then the resulting function will have an sspreq attribute.
    - -
    noredzone
    -
    This attribute indicates that the code generator should not use a red - zone, even if the target-specific ABI normally permits it.
    - -
    noimplicitfloat
    -
    This attributes disables implicit floating point instructions.
    - -
    naked
    -
    This attribute disables prologue / epilogue emission for the function. - This can have very system-specific consequences.
    @@ -1219,12 +1210,10 @@ define void @f() optsize { ... } concatenated by LLVM and treated as a single unit, but may be separated in the .ll file if desired. The syntax is very simple:

    -
    -
    +
     module asm "inline asm code goes here"
     module asm "more can go here"
     
    -

    The strings can contain any character by escaping non-printable characters. The escape sequence used is simply "\xx" where "xx" is the two digit hex code @@ -1246,11 +1235,9 @@ module asm "more can go here" data is to be laid out in memory. The syntax for the data layout is simply:

    -
    -
    +
     target datalayout = "layout specification"
     
    -

    The layout specification consists of a list of specifications separated by the minus sign character ('-'). Each specification starts with @@ -1283,8 +1270,10 @@ target datalayout = "layout specification"

    fsize:abi:pref
    This specifies the alignment for a floating point type of a given bit - size. The value of size must be either 32 (float) or 64 - (double).
    + size. Only values of size that are supported by the target + will work. 32 (float) and 64 (double) are supported on all targets; + 80 or 128 (different flavors of long double) are also supported on some + targets.
    asize:abi:pref
    This specifies the alignment for an aggregate type of a given bit @@ -1360,34 +1349,46 @@ is undefined. Pointer values are associated with address ranges according to the following rules:

      -
    • A pointer value formed from a - getelementptr instruction - is associated with the addresses associated with the first operand - of the getelementptr.
    • +
    • A pointer value is associated with the addresses associated with + any value it is based on.
    • An address of a global variable is associated with the address range of the variable's storage.
    • The result value of an allocation instruction is associated with the address range of the allocated storage.
    • A null pointer in the default address-space is associated with no address.
    • -
    • A pointer value formed by an - inttoptr is associated with all - address ranges of all pointer values that contribute (directly or - indirectly) to the computation of the pointer's value.
    • -
    • The result value of a - bitcast is associated with all - addresses associated with the operand of the bitcast.
    • An integer constant other than zero or a pointer value returned from a function not defined within LLVM may be associated with address ranges allocated through mechanisms other than those provided by LLVM. Such ranges shall not overlap with any ranges of addresses allocated by mechanisms provided by LLVM.
    • -
    + + +

    A pointer value is based on another pointer value according + to the following rules:

    + +
      +
    • A pointer value formed from a + getelementptr operation + is based on the first operand of the getelementptr.
    • +
    • The result value of a + bitcast is based on the operand + of the bitcast.
    • +
    • A pointer value formed by an + inttoptr is based on all + pointer values that contribute (directly or indirectly) to the + computation of the pointer's value.
    • +
    • The "based on" relationship is transitive.
    • +
    + +

    Note that this definition of "based" is intentionally + similar to the definition of "based" in C99, though it is + slightly weaker.

    LLVM IR does not associate types with memory. The result type of a load merely indicates the size and alignment of the memory from which to load, as well as the -interpretation of the value. The first operand of a +interpretation of the value. The first operand type of a store similarly only indicates the size and alignment of the store.

    @@ -1632,8 +1633,6 @@ Classifications - - @@ -1900,7 +1899,7 @@ Classifications href="#t_array">array of four i32 values. - i32 (i32 *) * + i32 (i32*) * A pointer to a function that takes an i32*, returning an i32. @@ -2167,13 +2166,11 @@ Classifications have pointer type. For example, the following is a legal LLVM file:

    -
    -
    +
     @X = global i32 17
     @Y = global i32 42
     @Z = global [2 x i32*] [ i32* @X, i32* @Y ]
     
    -
    @@ -2192,8 +2189,7 @@ Classifications surprising) transformations that are valid (in pseudo IR):

    -
    -
    +
       %A = add %X, undef
       %B = sub %X, undef
       %C = xor %X, undef
    @@ -2202,13 +2198,11 @@ Safe:
       %B = undef
       %C = undef
     
    -

    This is safe because all of the output bits are affected by the undef bits. Any output bit can have a zero or one depending on the input bits.

    -
    -
    +
       %A = or %X, undef
       %B = and %X, undef
     Safe:
    @@ -2218,7 +2212,6 @@ Unsafe:
       %A = undef
       %B = undef
     
    -

    These logical operations have bits that are not always affected by the input. For example, if "%X" has a zero bit, then the output of the 'and' operation will @@ -2229,8 +2222,7 @@ optimize the and to 0. Likewise, it is safe to assume that all the bits of the undef operand to the or could be set, allowing the or to be folded to -1.

    -
    -
    +
       %A = select undef, %X, %Y
       %B = select undef, 42, %Y
       %C = select %X, %Y, undef
    @@ -2243,7 +2235,6 @@ Unsafe:
       %B = undef
       %C = undef
     
    -

    This set of examples show that undefined select (and conditional branch) conditions can go "either way" but they have to come from one of the two @@ -2253,8 +2244,7 @@ the optimizer is allowed to assume that the undef operand could be the same as %Y, allowing the whole select to be eliminated.

    -
    -
    +
       %A = xor undef, undef
     
       %B = undef
    @@ -2272,7 +2262,6 @@ Safe:
       %E = undef
       %F = undef
     
    -

    This example points out that two undef operands are not necessarily the same. This can be surprising to people (and also matches C semantics) where they @@ -2285,15 +2274,13 @@ so the value is not necessarily consistent over time. In fact, %A and %C need to have the same semantics or the core LLVM "replace all uses with" concept would not hold.

    -
    -
    +
       %A = fdiv undef, %X
       %B = fdiv %X, undef
     Safe:
       %A = undef
     b: unreachable
     
    -

    These examples show the crucial difference between an undefined value and undefined behavior. An undefined value (like undef) is @@ -2308,15 +2295,13 @@ it: since the undefined operation "can't happen", the optimizer can assume that it occurs in dead code.

    -
    -
    +
     a:  store undef -> %X
     b:  store %X -> undef
     Safe:
     a: <deleted>
     b: unreachable
     
    -

    These examples reiterate the fdiv example: a store "of" an undefined value can be assumed to not have any effect: we can assume that the value is @@ -2342,7 +2327,6 @@ has undefined behavior.

    Trap value behavior is defined in terms of value dependence:

    -

    • Values other than phi nodes depend on their operands.
    • @@ -2374,7 +2358,8 @@ has undefined behavior.

    • An instruction with externally visible side effects depends on the most recent preceding instruction with externally visible side effects, following - the order in the IR. (This includes volatile loads and stores.)
    • + the order in the IR. (This includes + volatile operations.)
    • An instruction control-depends on a terminator instruction @@ -2385,7 +2370,6 @@ has undefined behavior.

    • Dependence is transitive.
    -

    Whenever a trap value is generated, all values which depend on it evaluate to trap. If they have side effects, the evoke their side effects as if each @@ -2394,8 +2378,7 @@ has undefined behavior.

    Here are some examples:

    -
    -
    +
     entry:
       %trap = sub nuw i32 0, 1           ; Results in a trap value.
       %still_trap = and i32 %trap, 0     ; Whereas (and i32 undef, 0) would return 0.
    @@ -2430,7 +2413,6 @@ end:
                                          ; so this is defined (ignoring earlier
                                          ; undefined behavior in this example).
     
    -
    @@ -2475,104 +2457,114 @@ end: supported). The following is the syntax for constant expressions:

    -
    trunc ( CST to TYPE )
    +
    trunc (CST to TYPE)
    Truncate a constant to another type. The bit size of CST must be larger than the bit size of TYPE. Both types must be integers.
    -
    zext ( CST to TYPE )
    +
    zext (CST to TYPE)
    Zero extend a constant to another type. The bit size of CST must be - smaller or equal to the bit size of TYPE. Both types must be - integers.
    + smaller than the bit size of TYPE. Both types must be integers.
    -
    sext ( CST to TYPE )
    +
    sext (CST to TYPE)
    Sign extend a constant to another type. The bit size of CST must be - smaller or equal to the bit size of TYPE. Both types must be - integers.
    + smaller than the bit size of TYPE. Both types must be integers. -
    fptrunc ( CST to TYPE )
    +
    fptrunc (CST to TYPE)
    Truncate a floating point constant to another floating point type. The size of CST must be larger than the size of TYPE. Both types must be floating point.
    -
    fpext ( CST to TYPE )
    +
    fpext (CST to TYPE)
    Floating point extend a constant to another type. The size of CST must be smaller or equal to the size of TYPE. Both types must be floating point.
    -
    fptoui ( CST to TYPE )
    +
    fptoui (CST to TYPE)
    Convert a floating point constant to the corresponding unsigned integer constant. TYPE must be a scalar or vector integer type. CST must be of scalar or vector floating point type. Both CST and TYPE must be scalars, or vectors of the same number of elements. If the value won't fit in the integer type, the results are undefined.
    -
    fptosi ( CST to TYPE )
    +
    fptosi (CST to TYPE)
    Convert a floating point constant to the corresponding signed integer constant. TYPE must be a scalar or vector integer type. CST must be of scalar or vector floating point type. Both CST and TYPE must be scalars, or vectors of the same number of elements. If the value won't fit in the integer type, the results are undefined.
    -
    uitofp ( CST to TYPE )
    +
    uitofp (CST to TYPE)
    Convert an unsigned integer constant to the corresponding floating point constant. TYPE must be a scalar or vector floating point type. CST must be of scalar or vector integer type. Both CST and TYPE must be scalars, or vectors of the same number of elements. If the value won't fit in the floating point type, the results are undefined.
    -
    sitofp ( CST to TYPE )
    +
    sitofp (CST to TYPE)
    Convert a signed integer constant to the corresponding floating point constant. TYPE must be a scalar or vector floating point type. CST must be of scalar or vector integer type. Both CST and TYPE must be scalars, or vectors of the same number of elements. If the value won't fit in the floating point type, the results are undefined.
    -
    ptrtoint ( CST to TYPE )
    +
    ptrtoint (CST to TYPE)
    Convert a pointer typed constant to the corresponding integer constant TYPE must be an integer type. CST must be of pointer type. The CST value is zero extended, truncated, or unchanged to make it fit in TYPE.
    -
    inttoptr ( CST to TYPE )
    +
    inttoptr (CST to TYPE)
    Convert a integer constant to a pointer constant. TYPE must be a pointer type. CST must be of integer type. The CST value is zero extended, truncated, or unchanged to make it fit in a pointer size. This one is really dangerous!
    -
    bitcast ( CST to TYPE )
    +
    bitcast (CST to TYPE)
    Convert a constant, CST, to another TYPE. The constraints of the operands are the same as those for the bitcast instruction.
    -
    getelementptr ( CSTPTR, IDX0, IDX1, ... )
    -
    getelementptr inbounds ( CSTPTR, IDX0, IDX1, ... )
    +
    getelementptr (CSTPTR, IDX0, IDX1, ...)
    +
    getelementptr inbounds (CSTPTR, IDX0, IDX1, ...)
    Perform the getelementptr operation on constants. As with the getelementptr instruction, the index list may have zero or more indexes, which are required to make sense for the type of "CSTPTR".
    -
    select ( COND, VAL1, VAL2 )
    +
    select (COND, VAL1, VAL2)
    Perform the select operation on constants.
    -
    icmp COND ( VAL1, VAL2 )
    +
    icmp COND (VAL1, VAL2)
    Performs the icmp operation on constants.
    -
    fcmp COND ( VAL1, VAL2 )
    +
    fcmp COND (VAL1, VAL2)
    Performs the fcmp operation on constants.
    -
    extractelement ( VAL, IDX )
    +
    extractelement (VAL, IDX)
    Perform the extractelement operation on constants.
    -
    insertelement ( VAL, ELT, IDX )
    +
    insertelement (VAL, ELT, IDX)
    Perform the insertelement operation on constants.
    -
    shufflevector ( VEC1, VEC2, IDXMASK )
    +
    shufflevector (VEC1, VEC2, IDXMASK)
    Perform the shufflevector operation on constants.
    -
    OPCODE ( LHS, RHS )
    +
    extractvalue (VAL, IDX0, IDX1, ...)
    +
    Perform the extractvalue operation on + constants. The index list is interpreted in a similar manner as indices in + a 'getelementptr' operation. At least one + index value must be specified.
    + +
    insertvalue (VAL, ELT, IDX0, IDX1, ...)
    +
    Perform the insertvalue operation on + constants. The index list is interpreted in a similar manner as indices in + a 'getelementptr' operation. At least one + index value must be specified.
    + +
    OPCODE (LHS, RHS)
    Perform the specified operation of the LHS and RHS constants. OPCODE may be any of the binary or bitwise binary operations. The constraints @@ -2602,31 +2594,25 @@ end: containing the asm needs to align its stack conservatively. An example inline assembler expression is:

    -
    -
    +
     i32 (i32) asm "bswap $0", "=r,r"
     
    -

    Inline assembler expressions may only be used as the callee operand of a call instruction. Thus, typically we have:

    -
    -
    +
     %X = call i32 asm "bswap $0", "=r,r"(i32 %Y)
     
    -

    Inline asms with side effects not visible in the constraint list must be marked as having side effects. This is done through the use of the 'sideeffect' keyword, like so:

    -
    -
    +
     call void asm sideeffect "eieio", ""()
     
    -

    In some cases inline asms will contain code that will not work unless the stack is aligned in some way, such as calls or SSE instructions on x86, @@ -2635,11 +2621,9 @@ call void asm sideeffect "eieio", ""() contain and should generate its usual stack alignment code in the prologue if the 'alignstack' keyword is present:

    -
    -
    +
     call void asm alignstack "eieio", ""()
     
    -

    If both keywords appear the 'sideeffect' keyword must come first.

    @@ -2663,13 +2647,11 @@ call void asm alignstack "eieio", ""() front-end to correlate backend errors that occur with inline asm back to the source code that produced it. For example:

    -
    -
    +
     call void asm sideeffect "something bad", ""(), !srcloc !42
     ...
     !42 = !{ i32 1234567 }
     
    -

    It is up to the front-end to make sense of the magic numbers it places in the IR.

    @@ -2704,22 +2686,18 @@ call void asm sideeffect "something bad", ""(), !srcloc !42 example: "!foo = metadata !{!4, !3}".

    Metadata can be used as function arguments. Here llvm.dbg.value - function is using two metadata arguments. + function is using two metadata arguments.

    -
    -
    +     
            call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
          
    -

    Metadata can be attached with an instruction. Here metadata !21 is - attached with add instruction using !dbg identifier. + attached with add instruction using !dbg identifier.

    -
    -
    +    
           %indvar.next = add i64 %indvar, 1, !dbg !21
         
    -

    @@ -3520,7 +3498,7 @@ Instruction

    If the exact keyword is present, the result value of the sdiv is a trap value if the result would - be rounded or if overflow would occur.

    + be rounded.

    Example:
    @@ -4239,7 +4217,7 @@ Instruction 
     
     
    Syntax:
    -  <result> = alloca <type>[, i32 <NumElements>][, align <alignment>]     ; yields {type*}:result
    +  <result> = alloca <type>[, <ty> <NumElements>][, align <alignment>]     ; yields {type*}:result
     
    Overview:
    @@ -4347,8 +4325,8 @@ Instruction
    Syntax:
    -  store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !]                   ; yields {void}
    -  volatile store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !]          ; yields {void}
    +  store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]                   ; yields {void}
    +  volatile store <ty> <value>, <ty>* <pointer>[, align <alignment>][, !nontemporal !<index>]          ; yields {void}
     
    Overview:
    @@ -4373,7 +4351,7 @@ Instruction produce less efficient code. An alignment of 1 is always safe.

    The optional !nontemporal metadata must reference a single metatadata - name corresponding to a metadata node with one i32 entry of + name <index> corresponding to a metadata node with one i32 entry of value 1. The existence of the !nontemporal metatadata on the instruction tells the optimizer and code generator that this load is not expected to be reused in the cache. The code generator may @@ -4440,8 +4418,7 @@ Instruction

    For example, let's consider a C code fragment and how it gets compiled to LLVM:

    -
    -
    +
     struct RT {
       char A;
       int B[10][20];
    @@ -4457,12 +4434,10 @@ int *foo(struct ST *s) {
       return &s[1].Z.B[5][13];
     }
     
    -

    The LLVM code generated by the GCC frontend is:

    -
    -
    +
     %RT = type { i8 , [10 x [20 x i32]], i8  }
     %ST = type { i32, double, %RT }
     
    @@ -4472,7 +4447,6 @@ entry:
       ret i32* %reg
     }
     
    -
    Semantics:

    In the example above, the first index is indexing into the '%ST*' @@ -5406,7 +5380,7 @@ Loop: ; Infinite loop that counts from 0 on up...

    Example:
       %retval = call i32 @test(i32 %argc)
    -  call i32 (i8 *, ...)* @printf(i8 * %msg, i32 12, i8 42)      ; yields i32
    +  call i32 (i8*, ...)* @printf(i8* %msg, i32 12, i8 42)        ; yields i32
       %X = tail call i32 @foo()                                    ; yields i32
       %Y = tail call fastcc i32 @foo()  ; yields i32
       call void %foo(i8 97 signext)
    @@ -5543,8 +5517,7 @@ freestanding environments and non-C-based languages.

    instruction and the variable argument handling intrinsic functions are used.

    -
    -
    +
     define i32 @test(i32 %X, ...) {
       ; Initialize variable argument processing
       %ap = alloca i8*
    @@ -5569,7 +5542,6 @@ declare void @llvm.va_start(i8*)
     declare void @llvm.va_copy(i8*, i8*)
     declare void @llvm.va_end(i8*)
     
    -
    @@ -5839,7 +5811,7 @@ LLVM.

    Syntax:
    -  declare i8 *@llvm.frameaddress(i32 <level>)
    +  declare i8* @llvm.frameaddress(i32 <level>)
     
    Overview:
    @@ -5873,7 +5845,7 @@ LLVM.

    Syntax:
    -  declare i8 *@llvm.stacksave()
    +  declare i8* @llvm.stacksave()
     
    Overview:
    @@ -5903,7 +5875,7 @@ LLVM.

    Syntax:
    -  declare void @llvm.stackrestore(i8 * %ptr)
    +  declare void @llvm.stackrestore(i8* %ptr)
     
    Overview:
    @@ -5992,7 +5964,7 @@ LLVM.

    Syntax:
    -  declare i64 @llvm.readcyclecounter( )
    +  declare i64 @llvm.readcyclecounter()
     
    Overview:
    @@ -6037,9 +6009,9 @@ LLVM.

    all bit widths however.

    -  declare void @llvm.memcpy.p0i8.p0i8.i32(i8 * <dest>, i8 * <src>,
    +  declare void @llvm.memcpy.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
                                               i32 <len>, i32 <align>, i1 <isvolatile>)
    -  declare void @llvm.memcpy.p0i8.p0i8.i64(i8 * <dest>, i8 * <src>,
    +  declare void @llvm.memcpy.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
                                               i64 <len>, i32 <align>, i1 <isvolatile>)
     
    @@ -6091,9 +6063,9 @@ LLVM.

    widths however.

    -  declare void @llvm.memmove.p0i8.p0i8.i32(i8 * <dest>, i8 * <src>,
    +  declare void @llvm.memmove.p0i8.p0i8.i32(i8* <dest>, i8* <src>,
                                                i32 <len>, i32 <align>, i1 <isvolatile>)
    -  declare void @llvm.memmove.p0i8.p0i8.i64(i8 * <dest>, i8 * <src>,
    +  declare void @llvm.memmove.p0i8.p0i8.i64(i8* <dest>, i8* <src>,
                                                i64 <len>, i32 <align>, i1 <isvolatile>)
     
    @@ -6147,9 +6119,9 @@ LLVM.

    widths however.

    -  declare void @llvm.memset.p0i8.i32(i8 * <dest>, i8 <val>,
    +  declare void @llvm.memset.p0i8.i32(i8* <dest>, i8 <val>,
                                          i32 <len>, i32 <align>, i1 <isvolatile>)
    -  declare void @llvm.memset.p0i8.i64(i8 * <dest>, i8 <val>,
    +  declare void @llvm.memset.p0i8.i64(i8* <dest>, i8 <val>,
                                          i64 <len>, i32 <align>, i1 <isvolatile>)
     
    @@ -6922,7 +6894,8 @@ LLVM.

    This intrinsic makes it possible to excise one parameter, marked with - the nest attribute, from a function. The result is a callable + the nest attribute, from a function. + The result is a callable function pointer lacking the nest parameter - the caller does not need to provide a value for it. Instead, the value to use is stored in advance in a "trampoline", a block of memory usually allocated on the stack, which also @@ -6934,17 +6907,15 @@ LLVM.

    pointer has signature i32 (i32, i32)*. It can be created as follows:

    -
    -
    +
       %tramp = alloca [10 x i8], align 4 ; size and alignment only correct for X86
       %tramp1 = getelementptr [10 x i8]* %tramp, i32 0, i32 0
    -  %p = call i8* @llvm.init.trampoline( i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval )
    +  %p = call i8* @llvm.init.trampoline(i8* %tramp1, i8* bitcast (i32 (i8* nest , i32, i32)* @f to i8*), i8* %nval)
       %fp = bitcast i8* %p to i32 (i32, i32)*
     
    -
    -

    The call %val = call i32 %fp( i32 %x, i32 %y ) is then equivalent - to %val = call i32 %f( i8* %nval, i32 %x, i32 %y ).

    +

    The call %val = call i32 %fp(i32 %x, i32 %y) is then equivalent + to %val = call i32 %f(i8* %nval, i32 %x, i32 %y).

    @@ -7024,7 +6995,7 @@ LLVM.

    Syntax:
    -  declare void @llvm.memory.barrier( i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>, i1 <device> )
    +  declare void @llvm.memory.barrier(i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>, i1 <device>)
     
    Overview:
    @@ -7081,7 +7052,7 @@ LLVM.

    store i32 4, %ptr %result1 = load i32* %ptr ; yields {i32}:result1 = 4 - call void @llvm.memory.barrier( i1 false, i1 true, i1 false, i1 false ) + call void @llvm.memory.barrier(i1 false, i1 true, i1 false, i1 false) ; guarantee the above finishes store i32 8, %ptr ; before this begins
    @@ -7101,10 +7072,10 @@ LLVM.

    support all bit widths however.

    -  declare i8 @llvm.atomic.cmp.swap.i8.p0i8( i8* <ptr>, i8 <cmp>, i8 <val> )
    -  declare i16 @llvm.atomic.cmp.swap.i16.p0i16( i16* <ptr>, i16 <cmp>, i16 <val> )
    -  declare i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* <ptr>, i32 <cmp>, i32 <val> )
    -  declare i64 @llvm.atomic.cmp.swap.i64.p0i64( i64* <ptr>, i64 <cmp>, i64 <val> )
    +  declare i8 @llvm.atomic.cmp.swap.i8.p0i8(i8* <ptr>, i8 <cmp>, i8 <val>)
    +  declare i16 @llvm.atomic.cmp.swap.i16.p0i16(i16* <ptr>, i16 <cmp>, i16 <val>)
    +  declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* <ptr>, i32 <cmp>, i32 <val>)
    +  declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* <ptr>, i64 <cmp>, i64 <val>)
     
    Overview:
    @@ -7133,13 +7104,13 @@ LLVM.

    store i32 4, %ptr %val1 = add i32 4, 4 -%result1 = call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %ptr, i32 4, %val1 ) +%result1 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 4, %val1) ; yields {i32}:result1 = 4 %stored1 = icmp eq i32 %result1, 4 ; yields {i1}:stored1 = true %memval1 = load i32* %ptr ; yields {i32}:memval1 = 8 %val2 = add i32 1, 1 -%result2 = call i32 @llvm.atomic.cmp.swap.i32.p0i32( i32* %ptr, i32 5, %val2 ) +%result2 = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %ptr, i32 5, %val2) ; yields {i32}:result2 = 8 %stored2 = icmp eq i32 %result2, 5 ; yields {i1}:stored2 = false @@ -7159,10 +7130,10 @@ LLVM.

    integer bit width. Not all targets support all bit widths however.

    -  declare i8 @llvm.atomic.swap.i8.p0i8( i8* <ptr>, i8 <val> )
    -  declare i16 @llvm.atomic.swap.i16.p0i16( i16* <ptr>, i16 <val> )
    -  declare i32 @llvm.atomic.swap.i32.p0i32( i32* <ptr>, i32 <val> )
    -  declare i64 @llvm.atomic.swap.i64.p0i64( i64* <ptr>, i64 <val> )
    +  declare i8 @llvm.atomic.swap.i8.p0i8(i8* <ptr>, i8 <val>)
    +  declare i16 @llvm.atomic.swap.i16.p0i16(i16* <ptr>, i16 <val>)
    +  declare i32 @llvm.atomic.swap.i32.p0i32(i32* <ptr>, i32 <val>)
    +  declare i64 @llvm.atomic.swap.i64.p0i64(i64* <ptr>, i64 <val>)
     
    Overview:
    @@ -7189,13 +7160,13 @@ LLVM.

    store i32 4, %ptr %val1 = add i32 4, 4 -%result1 = call i32 @llvm.atomic.swap.i32.p0i32( i32* %ptr, i32 %val1 ) +%result1 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val1) ; yields {i32}:result1 = 4 %stored1 = icmp eq i32 %result1, 4 ; yields {i1}:stored1 = true %memval1 = load i32* %ptr ; yields {i32}:memval1 = 8 %val2 = add i32 1, 1 -%result2 = call i32 @llvm.atomic.swap.i32.p0i32( i32* %ptr, i32 %val2 ) +%result2 = call i32 @llvm.atomic.swap.i32.p0i32(i32* %ptr, i32 %val2) ; yields {i32}:result2 = 8 %stored2 = icmp eq i32 %result2, 8 ; yields {i1}:stored2 = true @@ -7217,10 +7188,10 @@ LLVM.

    any integer bit width. Not all targets support all bit widths however.

    -  declare i8 @llvm.atomic.load.add.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.add.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.add.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.add.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.add.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.add.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.add.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.add.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    Overview:
    @@ -7243,11 +7214,11 @@ LLVM.

    %mallocP = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)) %ptr = bitcast i8* %mallocP to i32* store i32 4, %ptr -%result1 = call i32 @llvm.atomic.load.add.i32.p0i32( i32* %ptr, i32 4 ) +%result1 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 4) ; yields {i32}:result1 = 4 -%result2 = call i32 @llvm.atomic.load.add.i32.p0i32( i32* %ptr, i32 2 ) +%result2 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 2) ; yields {i32}:result2 = 8 -%result3 = call i32 @llvm.atomic.load.add.i32.p0i32( i32* %ptr, i32 5 ) +%result3 = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %ptr, i32 5) ; yields {i32}:result3 = 10 %memval1 = load i32* %ptr ; yields {i32}:memval1 = 15
    @@ -7268,10 +7239,10 @@ LLVM.

    support all bit widths however.

    -  declare i8 @llvm.atomic.load.sub.i8.p0i32( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.sub.i16.p0i32( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.sub.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.sub.i64.p0i32( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.sub.i8.p0i32(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.sub.i16.p0i32(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.sub.i64.p0i32(i64* <ptr>, i64 <delta>)
     
    Overview:
    @@ -7295,11 +7266,11 @@ LLVM.

    %mallocP = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)) %ptr = bitcast i8* %mallocP to i32* store i32 8, %ptr -%result1 = call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %ptr, i32 4 ) +%result1 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 4) ; yields {i32}:result1 = 8 -%result2 = call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %ptr, i32 2 ) +%result2 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 2) ; yields {i32}:result2 = 4 -%result3 = call i32 @llvm.atomic.load.sub.i32.p0i32( i32* %ptr, i32 5 ) +%result3 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %ptr, i32 5) ; yields {i32}:result3 = 2 %memval1 = load i32* %ptr ; yields {i32}:memval1 = -3 @@ -7324,31 +7295,31 @@ LLVM.

    widths however.

    -  declare i8 @llvm.atomic.load.and.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.and.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.and.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.and.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.and.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.and.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.and.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.and.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.or.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.or.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.or.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.or.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.or.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.or.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.or.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.or.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.nand.i8.p0i32( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.nand.i16.p0i32( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.nand.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.nand.i64.p0i32( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.nand.i8.p0i32(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.nand.i16.p0i32(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.nand.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.nand.i64.p0i32(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.xor.i8.p0i32( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.xor.i16.p0i32( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.xor.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.xor.i64.p0i32( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.xor.i8.p0i32(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.xor.i16.p0i32(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.xor.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.xor.i64.p0i32(i64* <ptr>, i64 <delta>)
     
    Overview:
    @@ -7373,13 +7344,13 @@ LLVM.

    %mallocP = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)) %ptr = bitcast i8* %mallocP to i32* store i32 0x0F0F, %ptr -%result0 = call i32 @llvm.atomic.load.nand.i32.p0i32( i32* %ptr, i32 0xFF ) +%result0 = call i32 @llvm.atomic.load.nand.i32.p0i32(i32* %ptr, i32 0xFF) ; yields {i32}:result0 = 0x0F0F -%result1 = call i32 @llvm.atomic.load.and.i32.p0i32( i32* %ptr, i32 0xFF ) +%result1 = call i32 @llvm.atomic.load.and.i32.p0i32(i32* %ptr, i32 0xFF) ; yields {i32}:result1 = 0xFFFFFFF0 -%result2 = call i32 @llvm.atomic.load.or.i32.p0i32( i32* %ptr, i32 0F ) +%result2 = call i32 @llvm.atomic.load.or.i32.p0i32(i32* %ptr, i32 0F) ; yields {i32}:result2 = 0xF0 -%result3 = call i32 @llvm.atomic.load.xor.i32.p0i32( i32* %ptr, i32 0F ) +%result3 = call i32 @llvm.atomic.load.xor.i32.p0i32(i32* %ptr, i32 0F) ; yields {i32}:result3 = FF %memval1 = load i32* %ptr ; yields {i32}:memval1 = F0 @@ -7403,31 +7374,31 @@ LLVM.

    address spaces. Not all targets support all bit widths however.

    -  declare i8 @llvm.atomic.load.max.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.max.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.max.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.max.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.max.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.max.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.max.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.max.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.min.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.min.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.min.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.min.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.min.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.min.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.min.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.min.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.umax.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.umax.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.umax.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.umax.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.umax.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.umax.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.umax.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.umax.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    -  declare i8 @llvm.atomic.load.umin.i8.p0i8( i8* <ptr>, i8 <delta> )
    -  declare i16 @llvm.atomic.load.umin.i16.p0i16( i16* <ptr>, i16 <delta> )
    -  declare i32 @llvm.atomic.load.umin.i32.p0i32( i32* <ptr>, i32 <delta> )
    -  declare i64 @llvm.atomic.load.umin.i64.p0i64( i64* <ptr>, i64 <delta> )
    +  declare i8 @llvm.atomic.load.umin.i8.p0i8(i8* <ptr>, i8 <delta>)
    +  declare i16 @llvm.atomic.load.umin.i16.p0i16(i16* <ptr>, i16 <delta>)
    +  declare i32 @llvm.atomic.load.umin.i32.p0i32(i32* <ptr>, i32 <delta>)
    +  declare i64 @llvm.atomic.load.umin.i64.p0i64(i64* <ptr>, i64 <delta>)
     
    Overview:
    @@ -7452,13 +7423,13 @@ LLVM.

    %mallocP = tail call i8* @malloc(i32 ptrtoint (i32* getelementptr (i32* null, i32 1) to i32)) %ptr = bitcast i8* %mallocP to i32* store i32 7, %ptr -%result0 = call i32 @llvm.atomic.load.min.i32.p0i32( i32* %ptr, i32 -2 ) +%result0 = call i32 @llvm.atomic.load.min.i32.p0i32(i32* %ptr, i32 -2) ; yields {i32}:result0 = 7 -%result1 = call i32 @llvm.atomic.load.max.i32.p0i32( i32* %ptr, i32 8 ) +%result1 = call i32 @llvm.atomic.load.max.i32.p0i32(i32* %ptr, i32 8) ; yields {i32}:result1 = -2 -%result2 = call i32 @llvm.atomic.load.umin.i32.p0i32( i32* %ptr, i32 10 ) +%result2 = call i32 @llvm.atomic.load.umin.i32.p0i32(i32* %ptr, i32 10) ; yields {i32}:result2 = 8 -%result3 = call i32 @llvm.atomic.load.umax.i32.p0i32( i32* %ptr, i32 30 ) +%result3 = call i32 @llvm.atomic.load.umax.i32.p0i32(i32* %ptr, i32 30) ; yields {i32}:result3 = 8 %memval1 = load i32* %ptr ; yields {i32}:memval1 = 30 @@ -7613,7 +7584,7 @@ LLVM.

    Syntax:
    -  declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int> )
    +  declare void @llvm.var.annotation(i8* <val>, i8* <str>, i8* <str>, i32  <int>)
     
    Overview:
    @@ -7644,11 +7615,11 @@ LLVM.

    any integer bit width.

    -  declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int> )
    -  declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int> )
    -  declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int> )
    -  declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int> )
    -  declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int> )
    +  declare i8 @llvm.annotation.i8(i8 <val>, i8* <str>, i8* <str>, i32  <int>)
    +  declare i16 @llvm.annotation.i16(i16 <val>, i8* <str>, i8* <str>, i32  <int>)
    +  declare i32 @llvm.annotation.i32(i32 <val>, i8* <str>, i8* <str>, i32  <int>)
    +  declare i64 @llvm.annotation.i64(i64 <val>, i8* <str>, i8* <str>, i32  <int>)
    +  declare i256 @llvm.annotation.i256(i256 <val>, i8* <str>, i8* <str>, i32  <int>)
     
    Overview:
    @@ -7702,7 +7673,7 @@ LLVM.

    Syntax:
    -  declare void @llvm.stackprotector( i8* <guard>, i8** <slot> )
    +  declare void @llvm.stackprotector(i8* <guard>, i8** <slot>)
     
    Overview:
    @@ -7736,8 +7707,8 @@ LLVM.

    Syntax:
    -  declare i32 @llvm.objectsize.i32( i8* <object>, i1 <type> )
    -  declare i64 @llvm.objectsize.i64( i8* <object>, i1 <type> )
    +  declare i32 @llvm.objectsize.i32(i8* <object>, i1 <type>)
    +  declare i64 @llvm.objectsize.i64(i8* <object>, i1 <type>)
     
    Overview:
    @@ -7773,7 +7744,7 @@ LLVM.

    Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-26 23:56:15 +0200 (Wed, 26 May 2010) $ + Last modified: $Date: 2010-07-13 14:26:09 +0200 (Tue, 13 Jul 2010) $ diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html index 2e2fdc5..dd90478 100644 --- a/docs/MakefileGuide.html +++ b/docs/MakefileGuide.html @@ -652,7 +652,7 @@ the profiled tools (gmon.out).
    DISABLE_ASSERTIONS
    If set to any value, causes the build to disable assertions, even if - building a release or profile build. This will exclude all assertion check + building a debug or profile build. This will exclude all assertion check code from the build. LLVM will execute faster, but with little help when things go wrong.
    EXPERIMENTAL_DIRS
    @@ -1025,7 +1025,7 @@ Reid Spencer
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-07 09:48:00 +0200 (Wed, 07 Jul 2010) $ diff --git a/docs/Passes.html b/docs/Passes.html index c239e44..70d9097 100644 --- a/docs/Passes.html +++ b/docs/Passes.html @@ -27,7 +27,7 @@ while () { my $o = $order{$1}; $o = "000" unless defined $o; push @x, "$o-$1$2\n"; - push @y, "$o $2\n"; + push @y, "$o -$1: $2\n"; } @x = map { s/^\d\d\d//; $_ } sort @x; @y = map { s/^\d\d\d//; $_ } sort @y; @@ -91,29 +91,46 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -dot-postdom-onlyPrint post dominator tree of function to 'dot' file (with no function bodies) -globalsmodref-aaSimple mod/ref analysis for globals -instcountCounts the various types of Instructions +-interprocedural-aa-evalExhaustive Interprocedural Alias Analysis Precision Evaluator +-interprocedural-basic-aaInterprocedural Basic Alias Analysis -intervalsInterval Partition Construction --loopsNatural Loop Construction +-iv-usersInduction Variable Users +-lazy-value-infoLazy Value Information Analysis +-ldaLoop Dependence Analysis +-libcall-aaLibCall Alias Analysis +-lintCheck for common errors in LLVM IR +-live-valuesValue Liveness Analysis +-loopsNatural Loop Information -memdepMemory Dependence Analysis +-module-debuginfoPrints module debug info metadata -no-aaNo Alias Analysis (always returns 'may' alias) -no-profileNo Profile Information +-pointertrackingTrack pointer bounds -postdomfrontierPost-Dominance Frontier Construction -postdomtreePost-Dominator Tree Construction -print-alias-setsAlias Set Printer -print-callgraphPrint a call graph -print-callgraph-sccsPrint SCCs of the Call Graph -print-cfg-sccsPrint SCCs of each function CFG +-print-dbginfoPrint debug info in human readable form +-print-dom-infoDominator Info Printer -print-externalfnconstantsPrint external fn callsites passed constants -print-functionPrint function to stderr -print-modulePrint module to stderr -print-used-typesFind Used Types +-profile-estimatorEstimate profiling information -profile-loaderLoad profile information from llvmprof.out +-profile-verifierVerify profiling information -scalar-evolutionScalar Evolution Analysis +-scev-aaScalarEvolution-based Alias Analysis -targetdataTarget Data Layout TRANSFORM PASSES OptionName +-abcdRemove redundant conditional branches -adceAggressive Dead Code Elimination +-always-inlineInliner for always_inline functions -argpromotionPromote 'by reference' arguments to scalars -block-placementProfile Guided Basic Block Placement -break-crit-edgesBreak critical edges in CFG @@ -125,17 +142,14 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -deadtypeelimDead Type Elimination -dieDead Instruction Elimination -dseDead Store Elimination +-functionattrsDeduce function attributes -globaldceDead Global Elimination -globaloptGlobal Variable Optimizer -gvnGlobal Value Numbering --indmemremIndirect Malloc and Free Removal -indvarsCanonicalize Induction Variables -inlineFunction Integration/Inlining --insert-block-profilingInsert instrumentation for block profiling -insert-edge-profilingInsert instrumentation for edge profiling --insert-function-profilingInsert instrumentation for function profiling --insert-null-profiling-rsMeasure profiling framework overhead --insert-rs-profiling-frameworkInsert random sampling instrumentation framework +-insert-optimal-edge-profilingInsert optimal instrumentation for edge profiling -instcombineCombine redundant instructions -internalizeInternalize Global Symbols -ipconstpropInterprocedural constant propagation @@ -152,22 +166,32 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! -loop-unrollUnroll loops -loop-unswitchUnswitch loops -loopsimplifyCanonicalize natural loops --lowerallocsLower allocations from instructions to calls -lowerinvokeLower invoke and unwind, for unwindless code generators -lowersetjmpLower Set Jump -lowerswitchLower SwitchInst's to branches -mem2regPromote Memory to Register -memcpyoptOptimize use of memcpy and friends +-mergefuncMerge Functions -mergereturnUnify function exit nodes +-partial-inlinerPartial Inliner +-partialspecializationPartial Specialization -prune-ehRemove unused exception handling info -reassociateReassociate expressions -reg2memDemote all values to stack slots -scalarreplScalar Replacement of Aggregates -sccpSparse Conditional Constant Propagation +-sinkCode Sinking -simplify-libcallsSimplify well-known library calls +-simplify-libcalls-halfpowrSimplify half_powr library calls -simplifycfgSimplify the CFG +-split-gepsSplit complex GEPs into simple GEPs +-ssiStatic Single Information Construction +-ssi-everythingStatic Single Information Construction (everything, intended for debugging) -stripStrip all symbols from a module +-strip-dead-debug-infoStrip debug info for unused symbols -strip-dead-prototypesRemove unused function declarations +-strip-debug-declareStrip all llvm.dbg.declare intrinsics +-strip-nondebugStrip all symbols, except dbg symbols, from a module -sretpromotionPromote sret arguments -tailcallelimTail Call Elimination -tailduplicateTail Duplication @@ -177,6 +201,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if ! OptionName -deadarghaX0rDead Argument Hacking (BUGPOINT USE ONLY; DO NOT USE) -extract-blocksExtract Basic Blocks From Module (for bugpoint use) +-instnamerAssign names to anonymous instructions -preverifyPreliminary module verification -verifyModule Verifier -view-cfgView CFG of function @@ -196,7 +221,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    This is a simple N^2 alias analysis accuracy evaluator. @@ -210,7 +235,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -222,7 +247,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    Yet to be written.

    @@ -230,7 +255,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -242,7 +267,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -253,7 +278,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -270,7 +295,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -281,7 +306,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -292,7 +317,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -304,7 +329,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -316,7 +341,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -329,7 +354,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -341,7 +366,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -355,7 +380,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -367,7 +392,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -381,7 +406,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -394,7 +419,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -404,7 +429,30 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    This pass implements a simple N^2 alias analysis accuracy evaluator. + Basically, for each function in the program, it simply queries to see how the + alias analysis implementation answers alias queries between each pair of + pointers in the function. +

    +
    + + + +
    +

    This pass defines the default implementation of the Alias Analysis interface + that simply implements a few identities (two different globals cannot alias, + etc), but otherwise does no analysis. +

    +
    + + +

    @@ -420,7 +468,80 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Bookkeeping for "interesting" users of expressions computed from + induction variables.

    +
    + + + +
    +

    Interface for lazy computation of value constraint information.

    +
    + + + +
    +

    Loop dependence analysis framework, which is used to detect dependences in + memory accesses in loops.

    +
    + + + +
    +

    LibCall Alias Analysis.

    +
    + + + +
    +

    This pass statically checks for common and easily-identified constructs + which produce undefined or likely unintended behavior in LLVM IR.

    + +

    It is not a guarantee of correctness, in two ways. First, it isn't + comprehensive. There are checks which could be done statically which are + not yet implemented. Some of these are indicated by TODO comments, but + those aren't comprehensive either. Second, many conditions cannot be + checked statically. This pass does no dynamic instrumentation, so it + can't check for all possible problems.

    + +

    Another limitation is that it assumes all code will be executed. A store + through a null pointer in a basic block which is never reached is harmless, + but this pass will warn about it anyway.

    + +

    Optimization passes may make conditions that this pass checks for more or + less obvious. If an optimization pass appears to be introducing a warning, + it may be that the optimization pass is merely exposing an existing + condition in the code.

    + +

    This code may be run before instcombine. In many cases, instcombine checks + for the same kinds of things and turns instructions with undefined behavior + into unreachable (or equivalent). Because of this, this pass makes some + effort to look through bitcasts and so on. +

    +
    + + + +
    +

    LLVM IR Value liveness analysis pass.

    +
    + + +

    @@ -433,7 +554,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -446,7 +567,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    This pass decodes the debug info metadata in a module and prints in a + (sufficiently-prepared-) human-readable form. + + For example, run this pass from opt along with the -analyze option, and + it'll print to standard output. +

    +
    + + +

    @@ -458,7 +592,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -469,7 +603,16 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Tracking of pointer bounds. +

    +
    + + +

    @@ -480,7 +623,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -491,7 +634,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    Yet to be written.

    @@ -499,7 +642,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -510,7 +653,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -521,7 +664,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -532,7 +675,31 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Pass that prints instructions, and associated debug info: +

      + +
    • source/line/col information
    • +
    • original variable name
    • +
    • original type name
    • +
    + +

    +
    + + + +
    +

    Dominator Info Printer.

    +
    + + +

    @@ -545,7 +712,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -557,7 +724,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -567,7 +734,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -578,7 +745,17 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Profiling information that estimates the profiling information + in a very crude and unimaginative way. +

    +
    + + +

    @@ -589,7 +766,15 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Pass that checks profiling information for plausibility.

    +
    + + +

    @@ -608,7 +793,45 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    Simple alias analysis implemented in terms of ScalarEvolution queries. + + This differs from traditional loop dependence analysis in that it tests + for dependencies within a single iteration of a loop, rather than + dependencies between different iterations. + + ScalarEvolution has a more complete understanding of pointer arithmetic + than BasicAliasAnalysis' collection of ad-hoc analyses. +

    +
    + + + +
    +

    + performs code stripping. this transformation can delete: +

    + +
      +
    1. names for virtual registers
    2. +
    3. symbols for internal globals and functions
    4. +
    5. debug information
    6. +
    + +

    + note that this transformation makes code much less readable, so it should + only be used in situations where the strip utility would be used, + such as reducing code size or making it harder to reverse engineer code. +

    +
    + + +

    Provides other passes access to information on how the size and alignment @@ -623,7 +846,22 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    ABCD removes conditional branch instructions that can be proved redundant. + With the SSI representation, each variable has a constraint. By analyzing these + constraints we can prove that a branch is redundant. When a branch is proved + redundant it means that one direction will always be taken; thus, we can change + this branch into an unconditional jump.

    +

    It is advisable to run SimplifyCFG and + Aggressive Dead Code Elimination after ABCD + to clean up the code.

    +
    + + +

    ADCE aggressively tries to eliminate code. This pass is similar to @@ -634,7 +872,16 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    A custom inliner that handles only functions that are marked as + "always inline".

    +
    + + +

    @@ -665,7 +912,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    This pass is a very simple profile guided basic block placement algorithm. @@ -677,7 +924,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -690,7 +937,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    This pass munges the code in the input function to better prepare it for @@ -700,7 +947,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -713,7 +960,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    This file implements constant propagation and merging. It looks for @@ -729,7 +976,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -741,7 +988,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -759,7 +1006,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -771,7 +1018,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -782,7 +1029,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -793,7 +1040,22 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    +
    +

    A simple interprocedural pass which walks the call-graph, looking for + functions which do not access or only read non-local memory, and marking them + readnone/readonly. In addition, it marks function arguments (of pointer type) + 'nocapture' if a call to the function does not create any copies of the pointer + value that outlive the call. This more or less means that the pointer is only + dereferenced, and not returned from the function or stored in a global. + This pass is implemented as a bottom-up traversal of the call-graph. +

    +
    + + +

    @@ -807,7 +1069,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -819,7 +1081,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -828,27 +1090,9 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    - -
    -

    - This pass finds places where memory allocation functions may escape into - indirect land. Some transforms are much easier (aka possible) only if free - or malloc are not called indirectly. -

    - -

    - Thus find places where the address of memory functions are taken and construct - bounce functions with direct calls of those functions. -

    -
    - - -

    @@ -899,7 +1143,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -909,26 +1153,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    -
    -

    - This pass instruments the specified program with counters for basic block - profiling, which counts the number of times each basic block executes. This - is the most basic form of profiling, which can tell which blocks are hot, but - cannot reliably detect hot paths through the CFG. -

    - -

    - Note that this implementation is very naïve. Control equivalent regions of - the CFG should not require duplicate counters, but it does put duplicate - counters in. -

    -
    - - -

    @@ -946,51 +1171,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    -

    - This pass instruments the specified program with counters for function - profiling, which counts the number of times each function is called. -

    -
    - - - -
    -

    - The basic profiler that does nothing. It is the default profiler and thus - terminates RSProfiler chains. It is useful for measuring - framework overhead. -

    -
    - - - -
    -

    - The second stage of the random-sampling instrumentation framework, duplicates - all instructions in a function, ignoring the profiling code, then connects the - two versions together at the entry and at backedges. At each connection point - a choice is made as to whether to jump to the profiled code (take a sample) or - execute the unprofiled code. -

    - -

    - After this pass, it is highly recommended to runmem2reg - and adce. instcombine, - load-vn, gdce, and - dse also are good to run afterwards. +

    This pass instruments the specified program with counters for edge profiling. + Edge profiling can give a reasonable approximation of the hot paths through a + program, and is used for a wide variety of program transformations.

    @@ -1044,7 +1236,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -1056,7 +1248,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -1070,7 +1262,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -1081,7 +1273,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

    \n" if !

    @@ -1110,7 +1302,7 @@ if (X < 3) {

    @@ -1139,7 +1331,7 @@ if (X < 3) {

    @@ -1175,7 +1367,7 @@ if (X < 3) {

    @@ -1188,7 +1380,7 @@ if (X < 3) {

    @@ -1201,7 +1393,7 @@ if (X < 3) {

    @@ -1213,7 +1405,7 @@ if (X < 3) {

    @@ -1224,7 +1416,7 @@ if (X < 3) {

    @@ -1238,7 +1430,7 @@ if (X < 3) {

    A simple loop rotation transformation.

    @@ -1246,7 +1438,7 @@ if (X < 3) {

    @@ -1258,7 +1450,7 @@ if (X < 3) {

    @@ -1288,7 +1480,7 @@ if (X < 3) {

    @@ -1329,7 +1521,7 @@ if (X < 3) {

    @@ -1345,7 +1537,7 @@ if (X < 3) {

    @@ -1386,7 +1578,7 @@ if (X < 3) {

    @@ -1415,7 +1607,7 @@ if (X < 3) {

    @@ -1427,7 +1619,7 @@ if (X < 3) {

    @@ -1443,7 +1635,7 @@ if (X < 3) {

    @@ -1454,7 +1646,28 @@ if (X < 3) {

    +
    +

    This pass looks for equivalent functions that are mergable and folds them. + + A hash is computed from the function, based on its type and number of + basic blocks. + + Once all hashes are computed, we perform an expensive equality comparison + on each function pair. This takes n^2/2 comparisons per bucket, so it's + important that the hash function be high quality. The equality comparison + iterates through each instruction in each basic block. + + When a match is found the functions are folded. If both functions are + overridable, we move the functionality into a new internal function and + leave two overridable thunks to it. +

    +
    + + +

    @@ -1465,7 +1678,33 @@ if (X < 3) {

    +
    +

    This pass performs partial inlining, typically by inlining an if + statement that surrounds the body of the function. +

    +
    + + + +
    +

    This pass finds function arguments that are often a common constant and + specializes a version of the called function for that constant. + + This pass simply does the cloning for functions it specializes. It depends + on IPSCCP and DAE to clean up the results. + + The initial heuristic favors constant arguments that are used in control + flow. +

    +
    + + +

    @@ -1478,7 +1717,7 @@ if (X < 3) {

    @@ -1501,7 +1740,7 @@ if (X < 3) {

    @@ -1518,7 +1757,7 @@ if (X < 3) {

    @@ -1540,7 +1779,7 @@ if (X < 3) {

    @@ -1563,7 +1802,17 @@ if (X < 3) {

    +
    +

    This pass moves instructions into successor blocks, when possible, so that + they aren't executed on paths where their results aren't needed. +

    +
    + + +

    @@ -1576,7 +1825,17 @@ if (X < 3) {

    +
    +

    Simple pass that applies an experimental transformation on calls + to specific functions. +

    +
    + + +

    @@ -1595,11 +1854,48 @@ if (X < 3) {

    +
    +

    This function breaks GEPs with more than 2 non-zero operands into smaller + GEPs each with no more than 2 non-zero operands. This exposes redundancy + between GEPs with common initial operand sequences. +

    +
    + + + +
    +

    This pass converts a list of variables to the Static Single Information + form. + + We are building an on-demand representation, that is, we do not convert + every single variable in the target function to SSI form. Rather, we receive + a list of target variables that must be converted. We also do not + completely convert a target variable to the SSI format. Instead, we only + change the variable in the points where new information can be attached + to its live range, that is, at branch points. +

    +
    + + + +
    +

    A pass that runs SSI on every non-void variable, intended for debugging. +

    +
    + + +

    - Performs code stripping. This transformation can delete: + performs code stripping. this transformation can delete:

      @@ -1609,7 +1905,7 @@ if (X < 3) {

    - Note that this transformation makes code much less readable, so it should + note that this transformation makes code much less readable, so it should only be used in situations where the strip utility would be used, such as reducing code size or making it harder to reverse engineer code.

    @@ -1617,7 +1913,7 @@ if (X < 3) {

    @@ -1630,7 +1926,41 @@ if (X < 3) {

    +
    +

    This pass implements code stripping. Specifically, it can delete: +

      +
    • names for virtual registers
    • +
    • symbols for internal globals and functions
    • +
    • debug information
    • +
    + Note that this transformation makes code much less readable, so it should + only be used in situations where the 'strip' utility would be used, such as + reducing code size or making it harder to reverse engineer code. +

    +
    + + + +
    +

    This pass implements code stripping. Specifically, it can delete: +

      +
    • names for virtual registers
    • +
    • symbols for internal globals and functions
    • +
    • debug information
    • +
    + Note that this transformation makes code much less readable, so it should + only be used in situations where the 'strip' utility would be used, such as + reducing code size or making it harder to reverse engineer code. +

    +
    + + +

    @@ -1653,7 +1983,7 @@ if (X < 3) {

    @@ -1685,7 +2015,7 @@ if (X < 3) {

    @@ -1705,7 +2035,7 @@ if (X < 3) {

    @@ -1716,7 +2046,7 @@ if (X < 3) {

    @@ -1726,7 +2056,19 @@ if (X < 3) {

    +
    +

    This is a little utility pass that gives instructions names, this is mostly + useful when diffing the effect of an optimization because deleting an + unnamed instruction can change all other instruction numbering, making the + diff very noisy. +

    +
    + + +

    @@ -1742,7 +2084,7 @@ if (X < 3) {

    @@ -1793,7 +2135,7 @@ if (X < 3) {

    @@ -1803,7 +2145,7 @@ if (X < 3) {

    @@ -1814,7 +2156,7 @@ if (X < 3) {

    @@ -1824,7 +2166,7 @@ if (X < 3) {

    @@ -1837,7 +2179,7 @@ if (X < 3) {

    @@ -1847,7 +2189,7 @@ if (X < 3) {

    @@ -1869,7 +2211,7 @@ if (X < 3) { Reid Spencer
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 11:33:18 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-06 17:52:15 +0200 (Tue, 06 Jul 2010) $ diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html index e017530..f70a0d3 100644 --- a/docs/SourceLevelDebugging.html +++ b/docs/SourceLevelDebugging.html @@ -401,7 +401,7 @@ height="369"> metadata, ;; Reference to type descriptor i1, ;; True if the global is local to compile unit (static) i1, ;; True if the global is defined in the compile unit (not extern) - { }* ;; Reference to the global variable + {}* ;; Reference to the global variable }
    @@ -433,6 +433,13 @@ provide details such as name, type and where the variable is defined.

    metadata, ;; Reference to type descriptor i1, ;; True if the global is local to compile unit (static) i1 ;; True if the global is defined in the compile unit (not extern) + i32 ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual + i32 ;; Index into a virtual function + metadata, ;; indicates which base type contains the vtable pointer for the + ;; derived class + i1 ;; isArtificial + i1 ;; isOptimized + Function *;; Pointer to LLVM function }
    @@ -782,11 +789,11 @@ DW_TAG_return_variable = 258
    -  void %llvm.dbg.declare( { } *, metadata )
    +  void %llvm.dbg.declare({}*, metadata)
     

    This intrinsic provides information about a local element (ex. variable.) The - first argument is the alloca for the variable, cast to a { }*. The + first argument is the alloca for the variable, cast to a {}*. The second argument is the %llvm.dbg.variable containing the description of the variable.

    @@ -800,7 +807,7 @@ DW_TAG_return_variable = 258
    -  void %llvm.dbg.value( metadata, i64, metadata )
    +  void %llvm.dbg.value(metadata, i64, metadata)
     

    This intrinsic provides information when a user source variable is set to a @@ -854,14 +861,14 @@ entry: %X = alloca i32, align 4 ; <i32*> [#uses=4] %Y = alloca i32, align 4 ; <i32*> [#uses=4] %Z = alloca i32, align 4 ; <i32*> [#uses=3] - %0 = bitcast i32* %X to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %0, metadata !0), !dbg !7 + %0 = bitcast i32* %X to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare({}* %0, metadata !0), !dbg !7 store i32 21, i32* %X, !dbg !8 - %1 = bitcast i32* %Y to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %1, metadata !9), !dbg !10 + %1 = bitcast i32* %Y to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare({}* %1, metadata !9), !dbg !10 store i32 22, i32* %Y, !dbg !11 - %2 = bitcast i32* %Z to { }* ; <{ }*> [#uses=1] - call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14 + %2 = bitcast i32* %Z to {}* ; <{}*> [#uses=1] + call void @llvm.dbg.declare({}* %2, metadata !12), !dbg !14 store i32 23, i32* %Z, !dbg !15 %tmp = load i32* %X, !dbg !16 ; <i32> [#uses=1] %tmp1 = load i32* %Y, !dbg !16 ; <i32> [#uses=1] @@ -872,7 +879,7 @@ entry: ret void, !dbg !18 } -declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone +declare void @llvm.dbg.declare({}*, metadata) nounwind readnone !0 = metadata !{i32 459008, metadata !1, metadata !"X", metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ] @@ -914,7 +921,7 @@ declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone

    -call void @llvm.dbg.declare({ }* %0, metadata !0), !dbg !7   
    +call void @llvm.dbg.declare({}* %0, metadata !0), !dbg !7   
     
    @@ -949,7 +956,7 @@ call void @llvm.dbg.declare({ }* %0, metadata !0), !dbg !7
    -call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14
    +call void @llvm.dbg.declare({}* %2, metadata !12), !dbg !14
     
    @@ -1773,7 +1780,7 @@ enum Trees { Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-05 00:49:55 +0200 (Sat, 05 Jun 2010) $ diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html index 4ad6191..0bdb6dd 100644 --- a/docs/TableGenFundamentals.html +++ b/docs/TableGenFundamentals.html @@ -144,7 +144,6 @@ file prints this (at the time of this writing):

    bit mayLoad = 0; bit mayStore = 0; bit isImplicitDef = 0; - bit isTwoAddress = 1; bit isConvertibleToThreeAddress = 1; bit isCommutable = 1; bit isTerminator = 0; @@ -422,11 +421,12 @@ class. This operation is analogous to $(foreach) in GNU make.
    !null(a)
    An integer {0,1} indicating whether list 'a' is empty.
    !if(a,b,c)
    -
    'b' if the result of integer operator 'a' is nonzero, 'c' otherwise.
    +
    'b' if the result of 'int' or 'bit' operator 'a' is nonzero, + 'c' otherwise.
    !eq(a,b)
    Integer one if string a is equal to string b, zero otherwise. This - only operates on string objects. Use !cast to compare other - types of objects.
    + only operates on string, int and bit objects. Use !cast to + compare other types of objects.

    Note that all of the values have rules specifying how they convert to values @@ -687,6 +687,91 @@ Here is an example TableGen fragment that shows this idea:

    +

    +A defm can also be used inside a multiclass providing several levels of +multiclass instanciations. +

    + +
    +
    +class Instruction<bits<4> opc, string Name> {
    +  bits<4> opcode = opc;
    +  string name = Name;
    +}
    +
    +multiclass basic_r<bits<4> opc> {
    +  def rr : Instruction<opc, "rr">;
    +  def rm : Instruction<opc, "rm">;
    +}
    +
    +multiclass basic_s<bits<4> opc> {
    +  defm SS : basic_r<opc>;
    +  defm SD : basic_r<opc>;
    +  def X : Instruction<opc, "x">;
    +}
    +
    +multiclass basic_p<bits<4> opc> {
    +  defm PS : basic_r<opc>;
    +  defm PD : basic_r<opc>;
    +  def Y : Instruction<opc, "y">;
    +}
    +
    +defm ADD : basic_s<0xf>, basic_p<0xf>;
    +...
    +
    +// Results
    +def ADDPDrm { ...
    +def ADDPDrr { ...
    +def ADDPSrm { ...
    +def ADDPSrr { ...
    +def ADDSDrm { ...
    +def ADDSDrr { ...
    +def ADDY { ...
    +def ADDX { ...
    +
    +
    + +

    +defm declarations can inherit from classes too, the +rule to follow is that the class list must start after the +last multiclass, and there must be at least one multiclass +before them. +

    + +
    +
    +class XD { bits<4> Prefix = 11; }
    +class XS { bits<4> Prefix = 12; }
    +
    +class I<bits<4> op> {
    +  bits<4> opcode = op;
    +}
    +
    +multiclass R {
    +  def rr : I<4>;
    +  def rm : I<2>;
    +}
    +
    +multiclass Y {
    +  defm SS : R, XD;
    +  defm SD : R, XS;
    +}
    +
    +defm Instr : Y;
    +
    +// Results
    +def InstrSDrm {
    +  bits<4> opcode = { 0, 0, 1, 0 };
    +  bits<4> Prefix = { 1, 1, 0, 0 };
    +}
    +...
    +def InstrSSrr {
    +  bits<4> opcode = { 0, 1, 0, 0 };
    +  bits<4> Prefix = { 1, 0, 1, 1 };
    +}
    +
    +
    +
    @@ -754,6 +839,32 @@ examples:

    need to be added to several records, and the records do not otherwise need to be opened, as in the case with the CALL* instructions above.

    +

    It's also possible to use "let" expressions inside multiclasses, providing +more ways to factor out commonality from the records, specially if using +several levels of multiclass instanciations. This also avoids the need of using +"let" expressions within subsequent records inside a multiclass.

    + +
    +
    +multiclass basic_r<bits<4> opc> {
    +  let Predicates = [HasSSE2] in {
    +    def rr : Instruction<opc, "rr">;
    +    def rm : Instruction<opc, "rm">;
    +  }
    +  let Predicates = [HasSSE3] in
    +    def rx : Instruction<opc, "rx">;
    +}
    +
    +multiclass basic_ss<bits<4> opc> {
    +  let IsDouble = 0 in
    +    defm SS : basic_r<opc>;
    +
    +  let IsDouble = 1 in
    +    defm SD : basic_r<opc>;
    +}
    +
    +defm ADD : basic_ss<0xf>;
    +
    @@ -795,7 +906,7 @@ This should highlight the APIs in TableGen/Record.h.

    Chris Lattner
    LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-21 22:35:09 +0200 (Mon, 21 Jun 2010) $ diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html index 0b9dd9f..aa2612c 100644 --- a/docs/WritingAnLLVMBackend.html +++ b/docs/WritingAnLLVMBackend.html @@ -913,9 +913,6 @@ implementation in SparcRegisterInfo.cpp:
  • getCalleeSavedRegs — Returns a list of callee-saved registers in the order of the desired callee-save stack frame offset.
  • -
  • getCalleeSavedRegClasses — Returns a list of preferred - register classes with which to spill each callee saved register.
  • -
  • getReservedRegs — Returns a bitset indexed by physical register numbers, indicating if a particular register is unavailable.
  • @@ -1313,7 +1310,8 @@ implementation in SparcInstrInfo.cpp: a direct store to a stack slot, return the register number of the destination and the FrameIndex of the stack slot. -
  • copyRegToReg — Copy values between a pair of registers.
  • +
  • copyPhysReg — Copy values between a pair of physical + registers.
  • storeRegToStackSlot — Store a register value to a stack slot.
  • @@ -2554,7 +2552,7 @@ with assembler. Mason Woo and Misha Brukman
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-11 19:01:17 +0200 (Sun, 11 Jul 2010) $ diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html index e353c2e..94c5ceb 100644 --- a/docs/WritingAnLLVMPass.html +++ b/docs/WritingAnLLVMPass.html @@ -189,18 +189,13 @@ LIBRARYNAME = Hello # dlopen/dlsym on the resulting library. LOADABLE_MODULE = 1 -# Tell the build system which LLVM libraries your pass needs. You'll probably -# need at least LLVMSystem.a, LLVMSupport.a, LLVMCore.a but possibly several -# others too. -LLVMLIBS = LLVMCore.a LLVMSupport.a LLVMSystem.a - # Include the makefile implementation stuff include $(LEVEL)/Makefile.common

    This makefile specifies that all of the .cpp files in the current directory are to be compiled and linked together into a -Debug/lib/Hello.so shared object that can be dynamically loaded by +Debug+Asserts/lib/Hello.so shared object that can be dynamically loaded by the opt or bugpoint tools via their -load options. If your operating system uses a suffix other than .so (such as windows or Mac OS/X), the appropriate extension will be used.

    @@ -337,7 +332,7 @@ is supplied as fourth argument.

    Now that it's all together, compile the file with a simple "gmake" command in the local directory and you should get a new -"Debug/lib/Hello.so file. Note that everything in this file is +"Debug+Asserts/lib/Hello.so file. Note that everything in this file is contained in an anonymous namespace: this reflects the fact that passes are self contained units that do not need external interfaces (although they can have them) to be useful.

    @@ -363,7 +358,7 @@ through our transformation like this (or course, any bitcode file will work):

    -$ opt -load ../../../Debug/lib/Hello.so -hello < hello.bc > /dev/null
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello < hello.bc > /dev/null
     Hello: __main
     Hello: puts
     Hello: main
    @@ -380,7 +375,7 @@ interesting way, we just throw away the result of opt (sending it to
     opt with the -help option:

    -$ opt -load ../../../Debug/lib/Hello.so -help
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -help
     OVERVIEW: llvm .bc -> .bc modular optimizer
     
     USAGE: opt [options] <input bitcode>
    @@ -408,7 +403,7 @@ the execution time of your pass along with the other passes you queue up.  For
     example:

    -$ opt -load ../../../Debug/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -hello -time-passes < hello.bc > /dev/null
     Hello: __main
     Hello: puts
     Hello: main
    @@ -1423,7 +1418,7 @@ how our Hello World pass interacts with other passes.
     Lets try it out with the gcse and licm passes:

    -$ opt -load ../../../Debug/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -licm --debug-pass=Structure < hello.bc > /dev/null
     Module Pass Manager
       Function Pass Manager
         Dominator Set Construction
    @@ -1460,7 +1455,7 @@ passes.

    World pass in between the two passes:

    -$ opt -load ../../../Debug/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
     Module Pass Manager
       Function Pass Manager
         Dominator Set Construction
    @@ -1501,7 +1496,7 @@ href="#getAnalysisUsage">getAnalysisUsage method to our pass:

    Now when we run our pass, we get this output:

    -$ opt -load ../../../Debug/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
    +$ opt -load ../../../Debug+Asserts/lib/Hello.so -gcse -hello -licm --debug-pass=Structure < hello.bc > /dev/null
     Pass Arguments:  -gcse -hello -licm
     Module Pass Manager
       Function Pass Manager
    @@ -1742,8 +1737,8 @@ want:

     (gdb) break llvm::PassManager::run
     Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70.
    -(gdb) run test.bc -load $(LLVMTOP)/llvm/Debug/lib/[libname].so -[passoption]
    -Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug/lib/[libname].so -[passoption]
    +(gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
    +Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption]
     Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70
     70      bool PassManager::run(Module &M) { return PM->run(M); }
     (gdb)
    @@ -1835,7 +1830,7 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.

    Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-07-08 10:27:18 +0200 (Thu, 08 Jul 2010) $ diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html index 602fd7d..783abb3 100644 --- a/docs/tutorial/LangImpl3.html +++ b/docs/tutorial/LangImpl3.html @@ -200,9 +200,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -574,8 +574,8 @@ ready> def bar(a) foo(a, 4.0) + bar(31337); Read function definition: define double @bar(double %a) { entry: - %calltmp = call double @foo( double %a, double 4.000000e+00 ) - %calltmp1 = call double @bar( double 3.133700e+04 ) + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) %addtmp = fadd double %calltmp, %calltmp1 ret double %addtmp } @@ -596,7 +596,7 @@ ready> cos(1.234); Read top-level expression: define double @""() { entry: - %calltmp = call double @cos( double 1.234000e+00 ) + %calltmp = call double @cos(double 1.234000e+00) ret double %calltmp }
    @@ -629,8 +629,8 @@ entry: define double @bar(double %a) { entry: - %calltmp = call double @foo( double %a, double 4.000000e+00 ) - %calltmp1 = call double @bar( double 3.133700e+04 ) + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) %addtmp = fadd double %calltmp, %calltmp1 ret double %addtmp } @@ -639,7 +639,7 @@ declare double @cos(double) define double @""() { entry: - %calltmp = call double @cos( double 1.234000e+00 ) + %calltmp = call double @cos(double 1.234000e+00) ret double %calltmp }
    @@ -1263,7 +1263,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $ diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html index aaec2b6..d286364 100644 --- a/docs/tutorial/LangImpl4.html +++ b/docs/tutorial/LangImpl4.html @@ -371,7 +371,7 @@ entry: ready> testfunc(4, 10); define double @""() { entry: - %calltmp = call double @testfunc( double 4.000000e+00, double 1.000000e+01 ) + %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01) ret double %calltmp } @@ -410,9 +410,9 @@ ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x); Read function definition: define double @foo(double %x) { entry: - %calltmp = call double @sin( double %x ) + %calltmp = call double @sin(double %x) %multmp = fmul double %calltmp, %calltmp - %calltmp2 = call double @cos( double %x ) + %calltmp2 = call double @cos(double %x) %multmp4 = fmul double %calltmp2, %calltmp2 %addtmp = fadd double %multmp, %multmp4 ret double %addtmp @@ -876,9 +876,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -1126,7 +1126,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $ diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html index 6f1f9df..b6398ca 100644 --- a/docs/tutorial/LangImpl5.html +++ b/docs/tutorial/LangImpl5.html @@ -676,7 +676,7 @@ entry: loop: ; preds = %loop, %entry %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ] ; body - %calltmp = call double @putchard( double 4.200000e+01 ) + %calltmp = call double @putchard(double 4.200000e+01) ; increment %nextvar = fadd double %i, 1.000000e+00 @@ -1377,9 +1377,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -1771,7 +1771,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $ diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html index 47f08dc..7368ea7 100644 --- a/docs/tutorial/LangImpl6.html +++ b/docs/tutorial/LangImpl6.html @@ -277,9 +277,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -531,7 +531,7 @@ def unary!(v) def unary-(v) 0-v; -# Define > with the same precedence as >. +# Define > with the same precedence as <. def binary> 10 (LHS RHS) RHS < LHS; @@ -1392,9 +1392,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -1808,7 +1808,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-21 22:31:30 +0200 (Mon, 21 Jun 2010) $ diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html index ddd5a9b..be2d69e 100644 --- a/docs/tutorial/LangImpl7.html +++ b/docs/tutorial/LangImpl7.html @@ -558,10 +558,10 @@ then: ; preds = %entry else: ; preds = %entry %x3 = load double* %x1 %subtmp = fsub double %x3, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %x4 = load double* %x1 %subtmp5 = fsub double %x4, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont @@ -596,9 +596,9 @@ then: else: %subtmp = fsub double %x, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont @@ -626,9 +626,9 @@ entry: else: %subtmp = fsub double %x, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 ret double %addtmp @@ -1672,9 +1672,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -2158,7 +2158,7 @@ int main() { Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-14 08:09:39 +0200 (Mon, 14 Jun 2010) $ diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html index 06dded1..c7c5370 100644 --- a/docs/tutorial/OCamlLangImpl3.html +++ b/docs/tutorial/OCamlLangImpl3.html @@ -524,8 +524,8 @@ ready> def bar(a) foo(a, 4.0) + bar(31337); Read function definition: define double @bar(double %a) { entry: - %calltmp = call double @foo( double %a, double 4.000000e+00 ) - %calltmp1 = call double @bar( double 3.133700e+04 ) + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) %addtmp = fadd double %calltmp, %calltmp1 ret double %addtmp } @@ -546,7 +546,7 @@ ready> cos(1.234); Read top-level expression: define double @""() { entry: - %calltmp = call double @cos( double 1.234000e+00 ) + %calltmp = call double @cos(double 1.234000e+00) ret double %calltmp }
    @@ -579,8 +579,8 @@ entry: define double @bar(double %a) { entry: - %calltmp = call double @foo( double %a, double 4.000000e+00 ) - %calltmp1 = call double @bar( double 3.133700e+04 ) + %calltmp = call double @foo(double %a, double 4.000000e+00) + %calltmp1 = call double @bar(double 3.133700e+04) %addtmp = fadd double %calltmp, %calltmp1 ret double %addtmp } @@ -589,7 +589,7 @@ declare double @cos(double) define double @""() { entry: - %calltmp = call double @cos( double 1.234000e+00 ) + %calltmp = call double @cos(double 1.234000e+00) ret double %calltmp }
    @@ -1087,7 +1087,7 @@ main () Chris Lattner
    Erick Tryzelaar
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html index 3277e10..a86184c 100644 --- a/docs/tutorial/OCamlLangImpl4.html +++ b/docs/tutorial/OCamlLangImpl4.html @@ -387,7 +387,7 @@ entry: ready> testfunc(4, 10); define double @""() { entry: - %calltmp = call double @testfunc( double 4.000000e+00, double 1.000000e+01 ) + %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01) ret double %calltmp } @@ -426,9 +426,9 @@ ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x); Read function definition: define double @foo(double %x) { entry: - %calltmp = call double @sin( double %x ) + %calltmp = call double @sin(double %x) %multmp = fmul double %calltmp, %calltmp - %calltmp2 = call double @cos( double %x ) + %calltmp2 = call double @cos(double %x) %multmp4 = fmul double %calltmp2, %calltmp2 %addtmp = fadd double %multmp, %multmp4 ret double %addtmp @@ -1023,7 +1023,7 @@ extern double putchard(double X) { Chris Lattner
    Erick Tryzelaar
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html index 7194a02..3173803 100644 --- a/docs/tutorial/OCamlLangImpl5.html +++ b/docs/tutorial/OCamlLangImpl5.html @@ -651,7 +651,7 @@ entry: loop: ; preds = %loop, %entry %i = phi double [ 1.000000e+00, %entry ], [ %nextvar, %loop ] ; body - %calltmp = call double @putchard( double 4.200000e+01 ) + %calltmp = call double @putchard(double 4.200000e+01) ; increment %nextvar = fadd double %i, 1.000000e+00 @@ -1563,7 +1563,7 @@ operators Chris Lattner
    Erick Tryzelaar
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html index f365e2d..1d4f8c7 100644 --- a/docs/tutorial/OCamlLangImpl6.html +++ b/docs/tutorial/OCamlLangImpl6.html @@ -512,7 +512,7 @@ def unary!(v) def unary-(v) 0-v; -# Define > with the same precedence as >. +# Define > with the same precedence as <. def binary> 10 (LHS RHS) RHS < LHS; @@ -1568,7 +1568,7 @@ SSA construction Chris Lattner
    Erick Tryzelaar
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-06-21 22:31:30 +0200 (Mon, 21 Jun 2010) $ diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html index 8f95389..ac31fbf 100644 --- a/docs/tutorial/OCamlLangImpl7.html +++ b/docs/tutorial/OCamlLangImpl7.html @@ -582,10 +582,10 @@ then: ; preds = %entry else: ; preds = %entry %x3 = load double* %x1 %subtmp = fsub double %x3, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %x4 = load double* %x1 %subtmp5 = fsub double %x4, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont @@ -620,9 +620,9 @@ then: else: %subtmp = fsub double %x, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 br label %ifcont @@ -650,9 +650,9 @@ entry: else: %subtmp = fsub double %x, 1.000000e+00 - %calltmp = call double @fib( double %subtmp ) + %calltmp = call double @fib(double %subtmp) %subtmp5 = fsub double %x, 2.000000e+00 - %calltmp6 = call double @fib( double %subtmp5 ) + %calltmp6 = call double @fib(double %subtmp5) %addtmp = fadd double %calltmp, %calltmp6 ret double %addtmp @@ -1901,7 +1901,7 @@ extern double printd(double X) { Chris Lattner
    The LLVM Compiler Infrastructure
    Erick Tryzelaar
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index bbca963..e09c990 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -1574,7 +1574,7 @@ public: )); } - ~OurCppRunException (void) throw () {}; + ~OurCppRunException (void) throw () {} }; diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp index 73520d8..80d691d 100644 --- a/examples/Kaleidoscope/Chapter3/toy.cpp +++ b/examples/Kaleidoscope/Chapter3/toy.cpp @@ -367,9 +367,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -400,7 +400,7 @@ Value *CallExprAST::Codegen() { Function *PrototypeAST::Codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp index a2ddda2..327c5c0 100644 --- a/examples/Kaleidoscope/Chapter4/toy.cpp +++ b/examples/Kaleidoscope/Chapter4/toy.cpp @@ -374,9 +374,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -407,7 +407,7 @@ Value *CallExprAST::Codegen() { Function *PrototypeAST::Codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index da64b7e..c98ee88 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -475,9 +475,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -615,7 +615,7 @@ Value *ForExprAST::Codegen() { StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. Value *EndCond = End->Codegen(); @@ -652,7 +652,7 @@ Value *ForExprAST::Codegen() { Function *PrototypeAST::Codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index 4e719e3..b7b8738 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -571,9 +571,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -719,7 +719,7 @@ Value *ForExprAST::Codegen() { StepVal = ConstantFP::get(getGlobalContext(), APFloat(1.0)); } - Value *NextVar = Builder.CreateAdd(Variable, StepVal, "nextvar"); + Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar"); // Compute the end condition. Value *EndCond = End->Codegen(); @@ -756,7 +756,7 @@ Value *ForExprAST::Codegen() { Function *PrototypeAST::Codegen() { // Make the function type: double(double,double) etc. - std::vector Doubles(Args.size(), + std::vector Doubles(Args.size(), Type::getDoubleTy(getGlobalContext())); FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()), Doubles, false); diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index 7dd9eae..0cf7869 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -667,9 +667,9 @@ Value *BinaryExprAST::Codegen() { if (L == 0 || R == 0) return 0; switch (Op) { - case '+': return Builder.CreateAdd(L, R, "addtmp"); - case '-': return Builder.CreateSub(L, R, "subtmp"); - case '*': return Builder.CreateMul(L, R, "multmp"); + case '+': return Builder.CreateFAdd(L, R, "addtmp"); + case '-': return Builder.CreateFSub(L, R, "subtmp"); + case '*': return Builder.CreateFMul(L, R, "multmp"); case '<': L = Builder.CreateFCmpULT(L, R, "cmptmp"); // Convert bool 0/1 to double 0.0 or 1.0 @@ -828,7 +828,7 @@ Value *ForExprAST::Codegen() { // Reload, increment, and restore the alloca. This handles the case where // the body of the loop mutates the variable. Value *CurVar = Builder.CreateLoad(Alloca, VarName.c_str()); - Value *NextVar = Builder.CreateAdd(CurVar, StepVal, "nextvar"); + Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar"); Builder.CreateStore(NextVar, Alloca); // Convert condition to a bool by comparing equal to 0.0. diff --git a/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml index 69d0928..8957f4c 100644 --- a/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml +++ b/examples/OCaml-Kaleidoscope/Chapter4/codegen.ml @@ -22,9 +22,9 @@ let rec codegen_expr = function let rhs_val = codegen_expr rhs in begin match op with - | '+' -> build_add lhs_val rhs_val "addtmp" builder - | '-' -> build_sub lhs_val rhs_val "subtmp" builder - | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder | '<' -> (* Convert bool 0/1 to double 0.0 or 1.0 *) let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in diff --git a/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml index e0db2d2..e4570a6 100644 --- a/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml +++ b/examples/OCaml-Kaleidoscope/Chapter5/codegen.ml @@ -22,9 +22,9 @@ let rec codegen_expr = function let rhs_val = codegen_expr rhs in begin match op with - | '+' -> build_add lhs_val rhs_val "addtmp" builder - | '-' -> build_sub lhs_val rhs_val "subtmp" builder - | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder | '<' -> (* Convert bool 0/1 to double 0.0 or 1.0 *) let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in diff --git a/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml index e00edf2..9667435 100644 --- a/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml +++ b/examples/OCaml-Kaleidoscope/Chapter6/codegen.ml @@ -31,9 +31,9 @@ let rec codegen_expr = function let rhs_val = codegen_expr rhs in begin match op with - | '+' -> build_add lhs_val rhs_val "addtmp" builder - | '-' -> build_sub lhs_val rhs_val "subtmp" builder - | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder | '<' -> (* Convert bool 0/1 to double 0.0 or 1.0 *) let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in diff --git a/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml b/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml index e8fac32..e66396e 100644 --- a/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml +++ b/examples/OCaml-Kaleidoscope/Chapter7/codegen.ml @@ -60,9 +60,9 @@ let rec codegen_expr = function let rhs_val = codegen_expr rhs in begin match op with - | '+' -> build_add lhs_val rhs_val "addtmp" builder - | '-' -> build_sub lhs_val rhs_val "subtmp" builder - | '*' -> build_mul lhs_val rhs_val "multmp" builder + | '+' -> build_fadd lhs_val rhs_val "addtmp" builder + | '-' -> build_fsub lhs_val rhs_val "subtmp" builder + | '*' -> build_fmul lhs_val rhs_val "multmp" builder | '<' -> (* Convert bool 0/1 to double 0.0 or 1.0 *) let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index d665c89..117f2d6 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -226,7 +226,8 @@ typedef enum { LLVMExternalWeakLinkage,/**< ExternalWeak linkage description */ LLVMGhostLinkage, /**< Obsolete */ LLVMCommonLinkage, /**< Tentative definitions */ - LLVMLinkerPrivateLinkage /**< Like Private, but linker removes. */ + LLVMLinkerPrivateLinkage, /**< Like Private, but linker removes. */ + LLVMLinkerPrivateWeakLinkage /**< Like LinkerPrivate, but is weak. */ } LLVMLinkage; typedef enum { diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h index 2948fc7..b1b9f36 100644 --- a/include/llvm-c/Target.h +++ b/include/llvm-c/Target.h @@ -32,7 +32,8 @@ typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef; typedef struct LLVMStructLayout *LLVMStructLayoutRef; /* Declare all of the target-initialization functions that are available. */ -#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##TargetInfo(void); +#define LLVM_TARGET(TargetName) \ + void LLVMInitialize##TargetName##TargetInfo(void); #include "llvm/Config/Targets.def" #undef LLVM_TARGET /* Explicit undef to make SWIG happier */ diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 7cafcb2..93f3760 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -102,7 +102,7 @@ lto_module_is_object_file_in_memory(const void* mem, size_t length); */ extern bool lto_module_is_object_file_in_memory_for_target(const void* mem, size_t length, - const char* target_triple_prefix); + const char* target_triple_prefix); /** diff --git a/include/llvm/ADT/DAGDeltaAlgorithm.h b/include/llvm/ADT/DAGDeltaAlgorithm.h new file mode 100644 index 0000000..99ed15c --- /dev/null +++ b/include/llvm/ADT/DAGDeltaAlgorithm.h @@ -0,0 +1,75 @@ +//===--- DAGDeltaAlgorithm.h - A DAG Minimization Algorithm ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DAGDELTAALGORITHM_H +#define LLVM_ADT_DAGDELTAALGORITHM_H + +#include +#include + +namespace llvm { + +/// DAGDeltaAlgorithm - Implements a "delta debugging" algorithm for minimizing +/// directed acyclic graphs using a predicate function. +/// +/// The result of the algorithm is a subset of the input change set which is +/// guaranteed to satisfy the predicate, assuming that the input set did. For +/// well formed predicates, the result set is guaranteed to be such that +/// removing any single element not required by the dependencies on the other +/// elements would falsify the predicate. +/// +/// The DAG should be used to represent dependencies in the changes which are +/// likely to hold across the predicate function. That is, for a particular +/// changeset S and predicate P: +/// +/// P(S) => P(S union pred(S)) +/// +/// The minization algorithm uses this dependency information to attempt to +/// eagerly prune large subsets of changes. As with \see DeltaAlgorithm, the DAG +/// is not required to satisfy this property, but the algorithm will run +/// substantially fewer tests with appropriate dependencies. \see DeltaAlgorithm +/// for more information on the properties which the predicate function itself +/// should satisfy. +class DAGDeltaAlgorithm { +public: + typedef unsigned change_ty; + typedef std::pair edge_ty; + + // FIXME: Use a decent data structure. + typedef std::set changeset_ty; + typedef std::vector changesetlist_ty; + +public: + virtual ~DAGDeltaAlgorithm() {} + + /// Run - Minimize the DAG formed by the \arg Changes vertices and the \arg + /// Dependencies edges by executing \see ExecuteOneTest() on subsets of + /// changes and returning the smallest set which still satisfies the test + /// predicate and the input \arg Dependencies. + /// + /// \param Changes The list of changes. + /// + /// \param Dependencies The list of dependencies amongst changes. For each + /// (x,y) in \arg Dependencies, both x and y must be in \arg Changes. The + /// minimization algorithm guarantees that for each tested changed set S, x + /// \in S implies y \in S. It is an error to have cyclic dependencies. + changeset_ty Run(const changeset_ty &Changes, + const std::vector &Dependencies); + + /// UpdatedSearchState - Callback used when the search state changes. + virtual void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets, + const changeset_ty &Required) {} + + /// ExecuteOneTest - Execute a single test predicate on the change set \arg S. + virtual bool ExecuteOneTest(const changeset_ty &S) = 0; +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 5c99473..c53e255 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -22,6 +22,7 @@ #include #include #include +#include #include namespace llvm { diff --git a/include/llvm/ADT/EquivalenceClasses.h b/include/llvm/ADT/EquivalenceClasses.h index 91a1429..07a5edf 100644 --- a/include/llvm/ADT/EquivalenceClasses.h +++ b/include/llvm/ADT/EquivalenceClasses.h @@ -169,7 +169,7 @@ public: /// getOrInsertLeaderValue - Return the leader for the specified value that is /// in the set. If the member is not in the set, it is inserted, then /// returned. - const ElemTy &getOrInsertLeaderValue(const ElemTy &V) const { + const ElemTy &getOrInsertLeaderValue(const ElemTy &V) { member_iterator MI = findLeader(insert(V)); assert(MI != member_end() && "Value is not in the set!"); return *MI; diff --git a/include/llvm/ADT/FoldingSet.h b/include/llvm/ADT/FoldingSet.h index e8979bb..fc8490a 100644 --- a/include/llvm/ADT/FoldingSet.h +++ b/include/llvm/ADT/FoldingSet.h @@ -166,6 +166,14 @@ public: /// FindNodeOrInsertPos. void InsertNode(Node *N, void *InsertPos); + /// InsertNode - Insert the specified node into the folding set, knowing that + /// it is not already in the folding set. + void InsertNode(Node *N) { + Node *Inserted = GetOrInsertNode(N); + (void)Inserted; + assert(Inserted == N && "Node already inserted!"); + } + /// size - Returns the number of nodes in the folding set. unsigned size() const { return NumNodes; } @@ -196,6 +204,10 @@ protected: template struct FoldingSetTrait { static inline void Profile(const T& X, FoldingSetNodeID& ID) { X.Profile(ID);} static inline void Profile(T& X, FoldingSetNodeID& ID) { X.Profile(ID); } + template + static inline void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) { + X.Profile(ID, Context); + } }; //===--------------------------------------------------------------------===// @@ -322,6 +334,77 @@ public: }; //===----------------------------------------------------------------------===// +/// ContextualFoldingSet - This template class is a further refinement +/// of FoldingSet which provides a context argument when calling +/// Profile on its nodes. Currently, that argument is fixed at +/// initialization time. +/// +/// T must be a subclass of FoldingSetNode and implement a Profile +/// function with signature +/// void Profile(llvm::FoldingSetNodeID &, Ctx); +template +class ContextualFoldingSet : public FoldingSetImpl { + // Unfortunately, this can't derive from FoldingSet because the + // construction vtable for FoldingSet requires + // FoldingSet::GetNodeProfile to be instantiated, which in turn + // requires a single-argument T::Profile(). + +private: + Ctx Context; + + /// GetNodeProfile - Each instantiatation of the FoldingSet needs to provide a + /// way to convert nodes into a unique specifier. + virtual void GetNodeProfile(FoldingSetNodeID &ID, + FoldingSetImpl::Node *N) const { + T *TN = static_cast(N); + + // We must use explicit template arguments in case Ctx is a + // reference type. + FoldingSetTrait::template Profile(*TN, ID, Context); + } + +public: + explicit ContextualFoldingSet(Ctx Context, unsigned Log2InitSize = 6) + : FoldingSetImpl(Log2InitSize), Context(Context) + {} + + Ctx getContext() const { return Context; } + + + typedef FoldingSetIterator iterator; + iterator begin() { return iterator(Buckets); } + iterator end() { return iterator(Buckets+NumBuckets); } + + typedef FoldingSetIterator const_iterator; + const_iterator begin() const { return const_iterator(Buckets); } + const_iterator end() const { return const_iterator(Buckets+NumBuckets); } + + typedef FoldingSetBucketIterator bucket_iterator; + + bucket_iterator bucket_begin(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1))); + } + + bucket_iterator bucket_end(unsigned hash) { + return bucket_iterator(Buckets + (hash & (NumBuckets-1)), true); + } + + /// GetOrInsertNode - If there is an existing simple Node exactly + /// equal to the specified node, return it. Otherwise, insert 'N' + /// and return it instead. + T *GetOrInsertNode(Node *N) { + return static_cast(FoldingSetImpl::GetOrInsertNode(N)); + } + + /// FindNodeOrInsertPos - Look up the node specified by ID. If it + /// exists, return it. If not, return the insertion token that will + /// make insertion faster. + T *FindNodeOrInsertPos(const FoldingSetNodeID &ID, void *&InsertPos) { + return static_cast(FoldingSetImpl::FindNodeOrInsertPos(ID, InsertPos)); + } +}; + +//===----------------------------------------------------------------------===// /// FoldingSetIteratorImpl - This is the common iterator support shared by all /// folding sets, which knows how to walk the folding set hash table. class FoldingSetIteratorImpl { diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h index f33fb1e..7aa3155 100644 --- a/include/llvm/ADT/ImmutableIntervalMap.h +++ b/include/llvm/ADT/ImmutableIntervalMap.h @@ -125,9 +125,11 @@ private: key_type_ref KCurrent = ImutInfo::KeyOfValue(this->Value(T)); if (ImutInfo::isLess(K, KCurrent)) - return this->Balance(Add_internal(V, this->Left(T)), this->Value(T), this->Right(T)); + return this->Balance(Add_internal(V, this->Left(T)), this->Value(T), + this->Right(T)); else - return this->Balance(this->Left(T), this->Value(T), Add_internal(V, this->Right(T))); + return this->Balance(this->Left(T), this->Value(T), + Add_internal(V, this->Right(T))); } // Remove all overlaps from T. @@ -150,9 +152,11 @@ private: // If current key does not overlap the inserted key. if (CurrentK.getStart() > K.getEnd()) - return this->Balance(RemoveOverlap(this->Left(T), K, Changed), this->Value(T), this->Right(T)); + return this->Balance(RemoveOverlap(this->Left(T), K, Changed), + this->Value(T), this->Right(T)); else if (CurrentK.getEnd() < K.getStart()) - return this->Balance(this->Left(T), this->Value(T), RemoveOverlap(this->Right(T), K, Changed)); + return this->Balance(this->Left(T), this->Value(T), + RemoveOverlap(this->Right(T), K, Changed)); // Current key overlaps with the inserted key. // Remove the current key. diff --git a/include/llvm/ADT/PostOrderIterator.h b/include/llvm/ADT/PostOrderIterator.h index 8315bc9..47e5b2b 100644 --- a/include/llvm/ADT/PostOrderIterator.h +++ b/include/llvm/ADT/PostOrderIterator.h @@ -19,7 +19,6 @@ #include "llvm/ADT/GraphTraits.h" #include "llvm/ADT/SmallPtrSet.h" #include -#include #include namespace llvm { @@ -52,21 +51,21 @@ class po_iterator : public std::iterator > VisitStack; + std::vector > VisitStack; void traverseChild() { - while (VisitStack.top().second != GT::child_end(VisitStack.top().first)) { - NodeType *BB = *VisitStack.top().second++; + while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) { + NodeType *BB = *VisitStack.back().second++; if (!this->Visited.count(BB)) { // If the block is not visited... this->Visited.insert(BB); - VisitStack.push(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); } } } inline po_iterator(NodeType *BB) { this->Visited.insert(BB); - VisitStack.push(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); traverseChild(); } inline po_iterator() {} // End is when stack is empty. @@ -75,7 +74,7 @@ class po_iterator : public std::iterator(S) { if(!S.count(BB)) { this->Visited.insert(BB); - VisitStack.push(std::make_pair(BB, GT::child_begin(BB))); + VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB))); traverseChild(); } } @@ -102,7 +101,7 @@ public: inline bool operator!=(const _Self& x) const { return !operator==(x); } inline pointer operator*() const { - return VisitStack.top().first; + return VisitStack.back().first; } // This is a nonstandard operator-> that dereferences the pointer an extra @@ -112,7 +111,7 @@ public: inline NodeType *operator->() const { return operator*(); } inline _Self& operator++() { // Preincrement - VisitStack.pop(); + VisitStack.pop_back(); if (!VisitStack.empty()) traverseChild(); return *this; diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h index fab133a..bf8286c 100644 --- a/include/llvm/ADT/SetVector.h +++ b/include/llvm/ADT/SetVector.h @@ -143,6 +143,14 @@ public: vector_.pop_back(); } + bool operator==(const SetVector &that) const { + return vector_ == that.vector_; + } + + bool operator!=(const SetVector &that) const { + return vector_ != that.vector_; + } + private: set_type set_; ///< The set. vector_type vector_; ///< The vector. diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index ef08125..424bdba 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -46,8 +46,10 @@ class SmallPtrSetIteratorImpl; class SmallPtrSetImpl { friend class SmallPtrSetIteratorImpl; protected: - /// CurArray - This is the current set of buckets. If it points to - /// SmallArray, then the set is in 'small mode'. + /// SmallArray - Points to a fixed size set of buckets, used in 'small mode'. + const void **SmallArray; + /// CurArray - This is the current set of buckets. If equal to SmallArray, + /// then the set is in 'small mode'. const void **CurArray; /// CurArraySize - The allocated size of CurArray, always a power of two. /// Note that CurArray points to an array that has CurArraySize+1 elements in @@ -57,15 +59,13 @@ protected: // If small, this is # elts allocated consequtively unsigned NumElements; unsigned NumTombstones; - const void *SmallArray[1]; // Must be last ivar. // Helper to copy construct a SmallPtrSet. - SmallPtrSetImpl(const SmallPtrSetImpl& that); - explicit SmallPtrSetImpl(unsigned SmallSize) { + SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl& that); + explicit SmallPtrSetImpl(const void **SmallStorage, unsigned SmallSize) : + SmallArray(SmallStorage), CurArray(SmallStorage), CurArraySize(SmallSize) { assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 && "Initial size must be a power of two!"); - CurArray = &SmallArray[0]; - CurArraySize = SmallSize; // The end pointer, always valid, is set to a valid element to help the // iterator. CurArray[SmallSize] = 0; @@ -123,7 +123,7 @@ protected: } private: - bool isSmall() const { return CurArray == &SmallArray[0]; } + bool isSmall() const { return CurArray == SmallArray; } unsigned Hash(const void *Ptr) const { return static_cast(((uintptr_t)Ptr >> 4) & (CurArraySize-1)); @@ -199,29 +199,29 @@ public: } }; -/// NextPowerOfTwo - This is a helper template that rounds N up to the next -/// power of two. +/// RoundUpToPowerOfTwo - This is a helper template that rounds N up to the next +/// power of two (which means N itself if N is already a power of two). template -struct NextPowerOfTwo; +struct RoundUpToPowerOfTwo; -/// NextPowerOfTwoH - If N is not a power of two, increase it. This is a helper -/// template used to implement NextPowerOfTwo. +/// RoundUpToPowerOfTwoH - If N is not a power of two, increase it. This is a +/// helper template used to implement RoundUpToPowerOfTwo. template -struct NextPowerOfTwoH { +struct RoundUpToPowerOfTwoH { enum { Val = N }; }; template -struct NextPowerOfTwoH { +struct RoundUpToPowerOfTwoH { enum { // We could just use NextVal = N+1, but this converges faster. N|(N-1) sets // the right-most zero bits to one all at once, e.g. 0b0011000 -> 0b0011111. - Val = NextPowerOfTwo<(N|(N-1)) + 1>::Val + Val = RoundUpToPowerOfTwo<(N|(N-1)) + 1>::Val }; }; template -struct NextPowerOfTwo { - enum { Val = NextPowerOfTwoH::Val }; +struct RoundUpToPowerOfTwo { + enum { Val = RoundUpToPowerOfTwoH::Val }; }; @@ -232,16 +232,17 @@ struct NextPowerOfTwo { template class SmallPtrSet : public SmallPtrSetImpl { // Make sure that SmallSize is a power of two, round up if not. - enum { SmallSizePowTwo = NextPowerOfTwo::Val }; - void *SmallArray[SmallSizePowTwo]; + enum { SmallSizePowTwo = RoundUpToPowerOfTwo::Val }; + /// SmallStorage - Fixed size storage used in 'small mode'. The extra element + /// ensures that the end iterator actually points to valid memory. + const void *SmallStorage[SmallSizePowTwo+1]; typedef PointerLikeTypeTraits PtrTraits; public: - SmallPtrSet() : SmallPtrSetImpl(NextPowerOfTwo::Val) {} - SmallPtrSet(const SmallPtrSet &that) : SmallPtrSetImpl(that) {} + SmallPtrSet() : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) {} + SmallPtrSet(const SmallPtrSet &that) : SmallPtrSetImpl(SmallStorage, that) {} template - SmallPtrSet(It I, It E) - : SmallPtrSetImpl(NextPowerOfTwo::Val) { + SmallPtrSet(It I, It E) : SmallPtrSetImpl(SmallStorage, SmallSizePowTwo) { insert(I, E); } diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h index 18c8619..fa61d20 100644 --- a/include/llvm/ADT/SmallVector.h +++ b/include/llvm/ADT/SmallVector.h @@ -17,6 +17,8 @@ #include "llvm/Support/type_traits.h" #include #include +#include +#include #include #include @@ -70,35 +72,35 @@ protected: #endif } FirstEl; // Space after 'FirstEl' is clobbered, do not add any instance vars after it. - + protected: SmallVectorBase(size_t Size) : BeginX(&FirstEl), EndX(&FirstEl), CapacityX((char*)&FirstEl+Size) {} - + /// isSmall - Return true if this is a smallvector which has not had dynamic /// memory allocated for it. bool isSmall() const { return BeginX == static_cast(&FirstEl); } - + /// size_in_bytes - This returns size()*sizeof(T). size_t size_in_bytes() const { return size_t((char*)EndX - (char*)BeginX); } - + /// capacity_in_bytes - This returns capacity()*sizeof(T). size_t capacity_in_bytes() const { return size_t((char*)CapacityX - (char*)BeginX); } - + /// grow_pod - This is an implementation of the grow() method which only works /// on POD-like datatypes and is out of line to reduce code duplication. void grow_pod(size_t MinSizeInBytes, size_t TSize); - + public: bool empty() const { return BeginX == EndX; } }; - + template class SmallVectorTemplateCommon : public SmallVectorBase { @@ -106,21 +108,21 @@ protected: void setEnd(T *P) { this->EndX = P; } public: SmallVectorTemplateCommon(size_t Size) : SmallVectorBase(Size) {} - + typedef size_t size_type; typedef ptrdiff_t difference_type; typedef T value_type; typedef T *iterator; typedef const T *const_iterator; - + typedef std::reverse_iterator const_reverse_iterator; typedef std::reverse_iterator reverse_iterator; - + typedef T &reference; typedef const T &const_reference; typedef T *pointer; typedef const T *const_pointer; - + // forward iterator creation methods. iterator begin() { return (iterator)this->BeginX; } const_iterator begin() const { return (const_iterator)this->BeginX; } @@ -130,7 +132,7 @@ protected: iterator capacity_ptr() { return (iterator)this->CapacityX; } const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;} public: - + // reverse iterator creation methods. reverse_iterator rbegin() { return reverse_iterator(end()); } const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } @@ -139,16 +141,16 @@ public: size_type size() const { return end()-begin(); } size_type max_size() const { return size_type(-1) / sizeof(T); } - + /// capacity - Return the total number of elements in the currently allocated /// buffer. size_t capacity() const { return capacity_ptr() - begin(); } - + /// data - Return a pointer to the vector's buffer, even if empty(). pointer data() { return pointer(begin()); } /// data - Return a pointer to the vector's buffer, even if empty(). const_pointer data() const { return const_pointer(begin()); } - + reference operator[](unsigned idx) { assert(begin() + idx < end()); return begin()[idx]; @@ -172,7 +174,7 @@ public: return end()[-1]; } }; - + /// SmallVectorTemplateBase - This is where we put method /// implementations that are designed to work with non-POD-like T's. template @@ -186,14 +188,14 @@ public: E->~T(); } } - + /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory /// starting with "Dest", constructing elements into it as needed. template static void uninitialized_copy(It1 I, It1 E, It2 Dest) { std::uninitialized_copy(I, E, Dest); } - + /// grow - double the size of the allocated memory, guaranteeing space for at /// least one more element or MinSize if specified. void grow(size_t MinSize = 0); @@ -207,34 +209,34 @@ void SmallVectorTemplateBase::grow(size_t MinSize) { size_t NewCapacity = 2*CurCapacity; if (NewCapacity < MinSize) NewCapacity = MinSize; - T *NewElts = static_cast(operator new(NewCapacity*sizeof(T))); - + T *NewElts = static_cast(malloc(NewCapacity*sizeof(T))); + // Copy the elements over. this->uninitialized_copy(this->begin(), this->end(), NewElts); - + // Destroy the original elements. destroy_range(this->begin(), this->end()); - + // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) - operator delete(this->begin()); - + free(this->begin()); + this->setEnd(NewElts+CurSize); this->BeginX = NewElts; this->CapacityX = this->begin()+NewCapacity; } - - + + /// SmallVectorTemplateBase - This is where we put method /// implementations that are designed to work with POD-like T's. template class SmallVectorTemplateBase : public SmallVectorTemplateCommon { public: SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon(Size) {} - + // No need to do a destroy loop for POD's. static void destroy_range(T *, T *) {} - + /// uninitialized_copy - Copy the range [I, E) onto the uninitialized memory /// starting with "Dest", constructing elements into it as needed. template @@ -259,33 +261,35 @@ public: this->grow_pod(MinSize*sizeof(T), sizeof(T)); } }; - - + + /// SmallVectorImpl - This class consists of common code factored out of the /// SmallVector class to reduce code duplication based on the SmallVector 'N' /// template parameter. template class SmallVectorImpl : public SmallVectorTemplateBase::value> { typedef SmallVectorTemplateBase::value > SuperClass; + + SmallVectorImpl(const SmallVectorImpl&); // DISABLED. public: typedef typename SuperClass::iterator iterator; typedef typename SuperClass::size_type size_type; - + // Default ctor - Initialize to empty. explicit SmallVectorImpl(unsigned N) : SmallVectorTemplateBase::value>(N*sizeof(T)) { } - + ~SmallVectorImpl() { // Destroy the constructed elements in the vector. this->destroy_range(this->begin(), this->end()); - + // If this wasn't grown from the inline copy, deallocate the old space. if (!this->isSmall()) - operator delete(this->begin()); + free(this->begin()); } - - + + void clear() { this->destroy_range(this->begin(), this->end()); this->EndX = this->BeginX; @@ -319,7 +323,7 @@ public: if (this->capacity() < N) this->grow(N); } - + void push_back(const T &Elt) { if (this->EndX < this->CapacityX) { Retry: @@ -330,21 +334,21 @@ public: this->grow(); goto Retry; } - + void pop_back() { this->setEnd(this->end()-1); this->end()->~T(); } - + T pop_back_val() { T Result = this->back(); pop_back(); return Result; } - - + + void swap(SmallVectorImpl &RHS); - + /// append - Add the specified range to the end of the SmallVector. /// template @@ -353,26 +357,26 @@ public: // Grow allocated space if needed. if (NumInputs > size_type(this->capacity_ptr()-this->end())) this->grow(this->size()+NumInputs); - + // Copy the new elements over. // TODO: NEED To compile time dispatch on whether in_iter is a random access // iterator to use the fast uninitialized_copy. std::uninitialized_copy(in_start, in_end, this->end()); this->setEnd(this->end() + NumInputs); } - + /// append - Add the specified range to the end of the SmallVector. /// void append(size_type NumInputs, const T &Elt) { // Grow allocated space if needed. if (NumInputs > size_type(this->capacity_ptr()-this->end())) this->grow(this->size()+NumInputs); - + // Copy the new elements over. std::uninitialized_fill_n(this->end(), NumInputs, Elt); this->setEnd(this->end() + NumInputs); } - + void assign(unsigned NumElts, const T &Elt) { clear(); if (this->capacity() < NumElts) @@ -380,7 +384,7 @@ public: this->setEnd(this->begin()+NumElts); construct_range(this->begin(), this->end(), Elt); } - + iterator erase(iterator I) { iterator N = I; // Shift all elts down one. @@ -389,7 +393,7 @@ public: pop_back(); return(N); } - + iterator erase(iterator S, iterator E) { iterator N = S; // Shift all elts down. @@ -399,13 +403,13 @@ public: this->setEnd(I); return(N); } - + iterator insert(iterator I, const T &Elt) { if (I == this->end()) { // Important special case for empty vector. push_back(Elt); return this->end()-1; } - + if (this->EndX < this->CapacityX) { Retry: new (this->end()) T(this->back()); @@ -420,22 +424,22 @@ public: I = this->begin()+EltNo; goto Retry; } - + iterator insert(iterator I, size_type NumToInsert, const T &Elt) { if (I == this->end()) { // Important special case for empty vector. append(NumToInsert, Elt); return this->end()-1; } - + // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); - + // Ensure there is enough space. reserve(static_cast(this->size() + NumToInsert)); - + // Uninvalidate the iterator. I = this->begin()+InsertElt; - + // If there are more elements between the insertion point and the end of the // range than there are being inserted, we can use a simple approach to // insertion. Since we already reserved space, we know that this won't @@ -443,48 +447,48 @@ public: if (size_t(this->end()-I) >= NumToInsert) { T *OldEnd = this->end(); append(this->end()-NumToInsert, this->end()); - + // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); - + std::fill_n(I, NumToInsert, Elt); return I; } - + // Otherwise, we're inserting more elements than exist already, and we're // not inserting at the end. - + // Copy over the elements that we're about to overwrite. T *OldEnd = this->end(); this->setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); - + // Replace the overwritten part. std::fill_n(I, NumOverwritten, Elt); - + // Insert the non-overwritten middle part. std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); return I; } - + template iterator insert(iterator I, ItTy From, ItTy To) { if (I == this->end()) { // Important special case for empty vector. append(From, To); return this->end()-1; } - + size_t NumToInsert = std::distance(From, To); // Convert iterator to elt# to avoid invalidating iterator when we reserve() size_t InsertElt = I - this->begin(); - + // Ensure there is enough space. reserve(static_cast(this->size() + NumToInsert)); - + // Uninvalidate the iterator. I = this->begin()+InsertElt; - + // If there are more elements between the insertion point and the end of the // range than there are being inserted, we can use a simple approach to // insertion. Since we already reserved space, we know that this won't @@ -492,37 +496,37 @@ public: if (size_t(this->end()-I) >= NumToInsert) { T *OldEnd = this->end(); append(this->end()-NumToInsert, this->end()); - + // Copy the existing elements that get replaced. std::copy_backward(I, OldEnd-NumToInsert, OldEnd); - + std::copy(From, To, I); return I; } - + // Otherwise, we're inserting more elements than exist already, and we're // not inserting at the end. - + // Copy over the elements that we're about to overwrite. T *OldEnd = this->end(); this->setEnd(this->end() + NumToInsert); size_t NumOverwritten = OldEnd-I; this->uninitialized_copy(I, OldEnd, this->end()-NumOverwritten); - + // Replace the overwritten part. for (; NumOverwritten > 0; --NumOverwritten) { *I = *From; ++I; ++From; } - + // Insert the non-overwritten middle part. this->uninitialized_copy(From, To, OldEnd); return I; } - + const SmallVectorImpl &operator=(const SmallVectorImpl &RHS); - + bool operator==(const SmallVectorImpl &RHS) const { if (this->size() != RHS.size()) return false; return std::equal(this->begin(), this->end(), RHS.begin()); @@ -530,12 +534,12 @@ public: bool operator!=(const SmallVectorImpl &RHS) const { return !(*this == RHS); } - + bool operator<(const SmallVectorImpl &RHS) const { return std::lexicographical_compare(this->begin(), this->end(), RHS.begin(), RHS.end()); } - + /// set_size - Set the array size to \arg N, which the current array must have /// enough capacity for. /// @@ -549,14 +553,14 @@ public: assert(N <= this->capacity()); this->setEnd(this->begin() + N); } - + private: static void construct_range(T *S, T *E, const T &Elt) { for (; S != E; ++S) new (S) T(Elt); } }; - + template void SmallVectorImpl::swap(SmallVectorImpl &RHS) { diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index c593c58..3a1319f 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -56,6 +56,10 @@ public: } const Statistic &operator++() { + // FIXME: This function and all those that follow carefully use an + // atomic operation to update the value safely in the presence of + // concurrent accesses, but not to read the return value, so the + // return value is not thread safe. sys::AtomicIncrement(&Value); return init(); } diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index be31ea0..feade6a 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -100,7 +100,8 @@ public: Psp, Solaris, Win32, - Haiku + Haiku, + Minix }; private: @@ -242,8 +243,8 @@ public: /// environment components with a single string. void setOSAndEnvironmentName(StringRef Str); - /// getArchNameForAssembler - Get an architecture name that is understood by the - /// target assembler. + /// getArchNameForAssembler - Get an architecture name that is understood by + /// the target assembler. const char *getArchNameForAssembler(); /// @} diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h index 6f57fe8..9e30bd4 100644 --- a/include/llvm/ADT/ValueMap.h +++ b/include/llvm/ADT/ValueMap.h @@ -59,16 +59,16 @@ struct ValueMapConfig { struct ExtraData {}; template - static void onRAUW(const ExtraDataT &Data, KeyT Old, KeyT New) {} + static void onRAUW(const ExtraDataT & /*Data*/, KeyT /*Old*/, KeyT /*New*/) {} template - static void onDelete(const ExtraDataT &Data, KeyT Old) {} + static void onDelete(const ExtraDataT &/*Data*/, KeyT /*Old*/) {} /// Returns a mutex that should be acquired around any changes to the map. /// This is only acquired from the CallbackVH (and held around calls to onRAUW /// and onDelete) and not inside other ValueMap methods. NULL means that no /// mutex is necessary. template - static sys::Mutex *getMutex(const ExtraDataT &Data) { return NULL; } + static sys::Mutex *getMutex(const ExtraDataT &/*Data*/) { return NULL; } }; /// See the file comment. diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index e4d26dd..9479d00 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -39,6 +39,7 @@ #define LLVM_ADT_ILIST_H #include +#include #include namespace llvm { diff --git a/include/llvm/AbstractTypeUser.h b/include/llvm/AbstractTypeUser.h index b6cceb4..81f5c5c 100644 --- a/include/llvm/AbstractTypeUser.h +++ b/include/llvm/AbstractTypeUser.h @@ -146,6 +146,7 @@ class PATypeHolder { mutable const Type *Ty; void destroy(); public: + PATypeHolder() : Ty(0) {} PATypeHolder(const Type *ty) : Ty(ty) { addRef(); } @@ -153,7 +154,7 @@ public: addRef(); } - ~PATypeHolder() { if (Ty) dropRef(); } + ~PATypeHolder() { dropRef(); } operator Type *() const { return get(); } Type *get() const; diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 9f41135..e611a35 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -165,27 +165,6 @@ public: /// ModRefInfo - Whether the pointer is loaded or stored to/from. /// ModRefResult ModRefInfo; - - /// AccessType - Specific fine-grained access information for the argument. - /// If none of these classifications is general enough, the - /// getModRefBehavior method should not return AccessesArguments*. If a - /// record is not returned for a particular argument, the argument is never - /// dead and never dereferenced. - enum AccessType { - /// ScalarAccess - The pointer is dereferenced. - /// - ScalarAccess, - - /// ArrayAccess - The pointer is indexed through as an array of elements. - /// - ArrayAccess, - - /// ElementAccess ?? P->F only? - - /// CallsThrough - Indirect calls are made through the specified function - /// pointer. - CallsThrough - }; }; /// getModRefBehavior - Return the behavior when calling the given call site. diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index 6ad2e5a..ac8f596 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -1,4 +1,4 @@ -//===-- CFGPrinter.h - CFG printer external interface ------------*- C++ -*-===// +//===-- CFGPrinter.h - CFG printer external interface -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -43,8 +43,8 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { return OS.str(); } - static std::string getCompleteNodeLabel(const BasicBlock *Node, - const Function *Graph) { + static std::string getCompleteNodeLabel(const BasicBlock *Node, + const Function *Graph) { std::string Str; raw_string_ostream OS(Str); diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index 493ecf5..b3390f4 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -21,9 +21,9 @@ namespace llvm { /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures /// specifies whether returning the value (or part of it) from the function - /// counts as capturing it or not. The boolean StoreCaptures specified whether - /// storing the value (or part of it) into memory anywhere automatically - /// counts as capturing it or not. + /// counts as capturing it or not. The boolean StoreCaptures specified + /// whether storing the value (or part of it) into memory anywhere + /// automatically counts as capturing it or not. bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures); diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h new file mode 100644 index 0000000..58096f1 --- /dev/null +++ b/include/llvm/Analysis/CodeMetrics.h @@ -0,0 +1,72 @@ +//===- CodeMetrics.h - Measures the weight of a function---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements various weight measurements for a function, helping +// the Inliner and PartialSpecialization decide whether to duplicate its +// contents. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_CODEMETRICS_H +#define LLVM_ANALYSIS_CODEMETRICS_H + +namespace llvm { + // CodeMetrics - Calculate size and a few similar metrics for a set of + // basic blocks. + struct CodeMetrics { + /// NeverInline - True if this callee should never be inlined into a + /// caller. + // bool NeverInline; + + // True if this function contains a call to setjmp or _setjmp + bool callsSetJmp; + + // True if this function calls itself + bool isRecursive; + + // True if this function contains one or more indirect branches + bool containsIndirectBr; + + /// usesDynamicAlloca - True if this function calls alloca (in the C sense). + bool usesDynamicAlloca; + + /// NumInsts, NumBlocks - Keep track of how large each function is, which + /// is used to estimate the code size cost of inlining it. + unsigned NumInsts, NumBlocks; + + /// NumBBInsts - Keeps track of basic block code size estimates. + DenseMap NumBBInsts; + + /// NumCalls - Keep track of the number of calls to 'big' functions. + unsigned NumCalls; + + /// NumVectorInsts - Keep track of how many instructions produce vector + /// values. The inliner is being more aggressive with inlining vector + /// kernels. + unsigned NumVectorInsts; + + /// NumRets - Keep track of how many Ret instructions the block contains. + unsigned NumRets; + + CodeMetrics() : callsSetJmp(false), isRecursive(false), + containsIndirectBr(false), usesDynamicAlloca(false), + NumInsts(0), NumBlocks(0), NumCalls(0), NumVectorInsts(0), + NumRets(0) {} + + /// analyzeBasicBlock - Add information about the specified basic block + /// to the current structure. + void analyzeBasicBlock(const BasicBlock *BB); + + /// analyzeFunction - Add information about the specified function + /// to the current structure. + void analyzeFunction(Function *F); + }; +} + +#endif diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h index 473b127..a85b6bc 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/Analysis/DebugInfo.h @@ -56,6 +56,7 @@ namespace llvm { } GlobalVariable *getGlobalVariableField(unsigned Elt) const; + Function *getFunctionField(unsigned Elt) const; public: explicit DIDescriptor() : DbgNode(0) {} @@ -409,6 +410,8 @@ namespace llvm { /// describes - Return true if this subprogram provides debugging /// information for the function F. bool describes(const Function *F); + + Function *getFunction() const { return getFunctionField(16); } }; /// DIGlobalVariable - This is a wrapper for a global variable. @@ -577,7 +580,8 @@ namespace llvm { unsigned RunTimeVer = 0); /// CreateFile - Create a new descriptor for the specified file. - DIFile CreateFile(StringRef Filename, StringRef Directory, DICompileUnit CU); + DIFile CreateFile(StringRef Filename, StringRef Directory, + DICompileUnit CU); /// CreateEnumerator - Create a single enumerator value. DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val); @@ -658,7 +662,8 @@ namespace llvm { unsigned VIndex = 0, DIType = DIType(), bool isArtificial = 0, - bool isOptimized = false); + bool isOptimized = false, + Function *Fn = 0); /// CreateSubprogramDefinition - Create new subprogram descriptor for the /// given declaration. diff --git a/include/llvm/Analysis/DominatorInternals.h b/include/llvm/Analysis/DominatorInternals.h index 8cea96d..0419688 100644 --- a/include/llvm/Analysis/DominatorInternals.h +++ b/include/llvm/Analysis/DominatorInternals.h @@ -152,8 +152,9 @@ void Compress(DominatorTreeBase& DT, } template -typename GraphT::NodeType* Eval(DominatorTreeBase& DT, - typename GraphT::NodeType *V) { +typename GraphT::NodeType* +Eval(DominatorTreeBase& DT, + typename GraphT::NodeType *V) { typename DominatorTreeBase::InfoRec &VInfo = DT.Info[V]; #if !BALANCE_IDOM_TREE @@ -265,14 +266,17 @@ void Calculate(DominatorTreeBase::NodeType>& DT, // initialize the semi dominator to point to the parent node WInfo.Semi = WInfo.Parent; - for (typename GraphTraits >::ChildIteratorType CI = - GraphTraits >::child_begin(W), - E = GraphTraits >::child_end(W); CI != E; ++CI) - if (DT.Info.count(*CI)) { // Only if this predecessor is reachable! - unsigned SemiU = DT.Info[Eval(DT, *CI)].Semi; + typedef GraphTraits > InvTraits; + for (typename InvTraits::ChildIteratorType CI = + InvTraits::child_begin(W), + E = InvTraits::child_end(W); CI != E; ++CI) { + typename InvTraits::NodeType *N = *CI; + if (DT.Info.count(N)) { // Only if this predecessor is reachable! + unsigned SemiU = DT.Info[Eval(DT, N)].Semi; if (SemiU < WInfo.Semi) WInfo.Semi = SemiU; } + } DT.Info[DT.Vertex[WInfo.Semi]].Bucket.push_back(W); diff --git a/include/llvm/Analysis/Dominators.h b/include/llvm/Analysis/Dominators.h index f810310..1979d3f 100644 --- a/include/llvm/Analysis/Dominators.h +++ b/include/llvm/Analysis/Dominators.h @@ -246,22 +246,25 @@ protected: typename GraphT::NodeType* NewBBSucc = *GraphT::child_begin(NewBB); std::vector PredBlocks; - for (typename GraphTraits >::ChildIteratorType PI = - GraphTraits >::child_begin(NewBB), - PE = GraphTraits >::child_end(NewBB); PI != PE; ++PI) + typedef GraphTraits > InvTraits; + for (typename InvTraits::ChildIteratorType PI = + InvTraits::child_begin(NewBB), + PE = InvTraits::child_end(NewBB); PI != PE; ++PI) PredBlocks.push_back(*PI); - assert(!PredBlocks.empty() && "No predblocks??"); + assert(!PredBlocks.empty() && "No predblocks?"); bool NewBBDominatesNewBBSucc = true; - for (typename GraphTraits >::ChildIteratorType PI = - GraphTraits >::child_begin(NewBBSucc), - E = GraphTraits >::child_end(NewBBSucc); PI != E; ++PI) - if (*PI != NewBB && !DT.dominates(NewBBSucc, *PI) && - DT.isReachableFromEntry(*PI)) { + for (typename InvTraits::ChildIteratorType PI = + InvTraits::child_begin(NewBBSucc), + E = InvTraits::child_end(NewBBSucc); PI != E; ++PI) { + typename InvTraits::NodeType *ND = *PI; + if (ND != NewBB && !DT.dominates(NewBBSucc, ND) && + DT.isReachableFromEntry(ND)) { NewBBDominatesNewBBSucc = false; break; } + } // Find NewBB's immediate dominator and create new dominator tree node for // NewBB. @@ -704,7 +707,6 @@ public: } ~DominatorTree() { - DT->releaseMemory(); delete DT; } diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index cac7cfe..462bddd 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -19,6 +19,7 @@ #include #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ValueMap.h" +#include "llvm/Analysis/CodeMetrics.h" namespace llvm { @@ -29,46 +30,6 @@ namespace llvm { template class SmallPtrSet; - // CodeMetrics - Calculate size and a few similar metrics for a set of - // basic blocks. - struct CodeMetrics { - /// NeverInline - True if this callee should never be inlined into a - /// caller. - bool NeverInline; - - /// usesDynamicAlloca - True if this function calls alloca (in the C sense). - bool usesDynamicAlloca; - - /// NumInsts, NumBlocks - Keep track of how large each function is, which - /// is used to estimate the code size cost of inlining it. - unsigned NumInsts, NumBlocks; - - /// NumBBInsts - Keeps track of basic block code size estimates. - DenseMap NumBBInsts; - - /// NumCalls - Keep track of the number of calls to 'big' functions. - unsigned NumCalls; - - /// NumVectorInsts - Keep track of how many instructions produce vector - /// values. The inliner is being more aggressive with inlining vector - /// kernels. - unsigned NumVectorInsts; - - /// NumRets - Keep track of how many Ret instructions the block contains. - unsigned NumRets; - - CodeMetrics() : NeverInline(false), usesDynamicAlloca(false), NumInsts(0), - NumBlocks(0), NumCalls(0), NumVectorInsts(0), NumRets(0) {} - - /// analyzeBasicBlock - Add information about the specified basic block - /// to the current structure. - void analyzeBasicBlock(const BasicBlock *BB); - - /// analyzeFunction - Add information about the specified function - /// to the current structure. - void analyzeFunction(Function *F); - }; - namespace InlineConstants { // Various magic constants used to adjust heuristics. const int InstrCost = 5; @@ -163,6 +124,10 @@ namespace llvm { /// analyzeFunction - Add information about the specified function /// to the current structure. void analyzeFunction(Function *F); + + /// NeverInline - Returns true if the function should never be + /// inlined into any caller. + bool NeverInline(); }; // The Function* for a function can be changed (by ArgumentPromotion); diff --git a/include/llvm/Analysis/IntervalIterator.h b/include/llvm/Analysis/IntervalIterator.h index d842840..82b3294 100644 --- a/include/llvm/Analysis/IntervalIterator.h +++ b/include/llvm/Analysis/IntervalIterator.h @@ -36,9 +36,9 @@ #include "llvm/Analysis/IntervalPartition.h" #include "llvm/Function.h" #include "llvm/Support/CFG.h" -#include -#include #include +#include +#include namespace llvm { @@ -88,7 +88,7 @@ inline void addNodeToInterval(Interval *Int, Interval *I) { template, class IGT = GraphTraits > > class IntervalIterator { - std::stack > IntStack; + std::vector > IntStack; std::set Visited; OrigContainer_t *OrigContainer; bool IOwnMem; // If True, delete intervals when done with them @@ -116,15 +116,15 @@ public: if (IOwnMem) while (!IntStack.empty()) { delete operator*(); - IntStack.pop(); + IntStack.pop_back(); } } inline bool operator==(const _Self& x) const { return IntStack == x.IntStack;} inline bool operator!=(const _Self& x) const { return !operator==(x); } - inline const Interval *operator*() const { return IntStack.top().first; } - inline Interval *operator*() { return IntStack.top().first; } + inline const Interval *operator*() const { return IntStack.back().first; } + inline Interval *operator*() { return IntStack.back().first; } inline const Interval *operator->() const { return operator*(); } inline Interval *operator->() { return operator*(); } @@ -133,8 +133,8 @@ public: do { // All of the intervals on the stack have been visited. Try visiting // their successors now. - Interval::succ_iterator &SuccIt = IntStack.top().second, - EndIt = succ_end(IntStack.top().first); + Interval::succ_iterator &SuccIt = IntStack.back().second, + EndIt = succ_end(IntStack.back().first); while (SuccIt != EndIt) { // Loop over all interval succs bool Done = ProcessInterval(getSourceGraphNode(OrigContainer, *SuccIt)); ++SuccIt; // Increment iterator @@ -142,10 +142,10 @@ public: } // Free interval memory... if necessary - if (IOwnMem) delete IntStack.top().first; + if (IOwnMem) delete IntStack.back().first; // We ran out of successors for this interval... pop off the stack - IntStack.pop(); + IntStack.pop_back(); } while (!IntStack.empty()); return *this; @@ -175,7 +175,7 @@ private: E = GT::child_end(Node); I != E; ++I) ProcessNode(Int, getSourceGraphNode(OrigContainer, *I)); - IntStack.push(std::make_pair(Int, succ_begin(Int))); + IntStack.push_back(std::make_pair(Int, succ_begin(Int))); return true; } diff --git a/include/llvm/Analysis/Loads.h b/include/llvm/Analysis/Loads.h new file mode 100644 index 0000000..1574262 --- /dev/null +++ b/include/llvm/Analysis/Loads.h @@ -0,0 +1,51 @@ +//===- Loads.h - Local load analysis --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_LOADS_H +#define LLVM_ANALYSIS_LOADS_H + +#include "llvm/BasicBlock.h" + +namespace llvm { + +class AliasAnalysis; +class TargetData; + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD = 0); + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at +/// the instruction before ScanFrom) checking to see if we have the value at +/// the memory address *Ptr locally available within a small number of +/// instructions. If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't +/// find something that invalidates *Ptr or provides it, ScanFrom would be +/// left at begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. +/// If it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan = 6, + AliasAnalysis *AA = 0); + +} + +#endif diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 2babc25..9455fd8 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -256,6 +256,27 @@ public: /// BlockT *getLoopPreheader() const { // Keep track of nodes outside the loop branching to the header... + BlockT *Out = getLoopPredecessor(); + if (!Out) return 0; + + // Make sure there is only one exit out of the preheader. + typedef GraphTraits BlockTraits; + typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); + ++SI; + if (SI != BlockTraits::child_end(Out)) + return 0; // Multiple exits from the block, must not be a preheader. + + // The predecessor has exactly one successor, so it is a preheader. + return Out; + } + + /// getLoopPredecessor - If the given loop's header has exactly one unique + /// predecessor outside the loop, return it. Otherwise return null. + /// This is less strict that the loop "preheader" concept, which requires + /// the predecessor to have exactly one successor. + /// + BlockT *getLoopPredecessor() const { + // Keep track of nodes outside the loop branching to the header... BlockT *Out = 0; // Loop over the predecessors of the header node... @@ -264,22 +285,17 @@ public: typedef GraphTraits > InvBlockTraits; for (typename InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(Header), - PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) - if (!contains(*PI)) { // If the block is not in the loop... - if (Out && Out != *PI) + PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { + typename InvBlockTraits::NodeType *N = *PI; + if (!contains(N)) { // If the block is not in the loop... + if (Out && Out != N) return 0; // Multiple predecessors outside the loop - Out = *PI; + Out = N; } + } // Make sure there is only one exit out of the preheader. assert(Out && "Header of loop has no predecessors from outside loop?"); - typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); - ++SI; - if (SI != BlockTraits::child_end(Out)) - return 0; // Multiple exits from the block, must not be a preheader. - - // If there is exactly one preheader, return it. If there was zero, then - // Out is still null. return Out; } @@ -293,11 +309,13 @@ public: typename InvBlockTraits::ChildIteratorType PE = InvBlockTraits::child_end(Header); BlockT *Latch = 0; - for (; PI != PE; ++PI) - if (contains(*PI)) { + for (; PI != PE; ++PI) { + typename InvBlockTraits::NodeType *N = *PI; + if (contains(N)) { if (Latch) return 0; - Latch = *PI; + Latch = N; } + } return Latch; } @@ -409,10 +427,11 @@ public: for (typename InvBlockTraits::ChildIteratorType PI = InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB); PI != PE; ++PI) { - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *PI)) + typename InvBlockTraits::NodeType *N = *PI; + if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), N)) HasInsideLoopPreds = true; else - OutsideLoopPreds.push_back(*PI); + OutsideLoopPreds.push_back(N); } if (BB == getHeader()) { @@ -743,9 +762,11 @@ public: typedef GraphTraits > InvBlockTraits; for (typename InvBlockTraits::ChildIteratorType I = InvBlockTraits::child_begin(BB), E = InvBlockTraits::child_end(BB); - I != E; ++I) - if (DT.dominates(BB, *I)) // If BB dominates its predecessor... - TodoStack.push_back(*I); + I != E; ++I) { + typename InvBlockTraits::NodeType *N = *I; + if (DT.dominates(BB, N)) // If BB dominates its predecessor... + TodoStack.push_back(N); + } if (TodoStack.empty()) return 0; // No backedges to this block... diff --git a/include/llvm/Analysis/MemoryBuiltins.h b/include/llvm/Analysis/MemoryBuiltins.h index a7f42c9..a4f9162 100644 --- a/include/llvm/Analysis/MemoryBuiltins.h +++ b/include/llvm/Analysis/MemoryBuiltins.h @@ -72,8 +72,8 @@ Value *getMallocArraySize(CallInst *CI, const TargetData *TD, // free Call Utility Functions. // -/// isFreeCall - Returns true if the value is a call to the builtin free() -bool isFreeCall(const Value *I); +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *isFreeCall(const Value *I); } // End llvm namespace diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index d3a8d8f..8da3af0 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -343,10 +343,6 @@ namespace llvm { BackedgeTakenInfo HowManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L, bool isSigned); - /// getLoopPredecessor - If the given loop's header has exactly one unique - /// predecessor outside the loop, return it. Otherwise return null. - BasicBlock *getLoopPredecessor(const Loop *L); - /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB /// (which may not be an immediate predecessor) which has exactly one /// successor from which BB is reachable, or null if no such block is @@ -530,10 +526,6 @@ namespace llvm { /// widening. const SCEV *getTruncateOrNoop(const SCEV *V, const Type *Ty); - /// getIntegerSCEV - Given a SCEVable type, create a constant for the - /// specified signed integer value and return a SCEV for the constant. - const SCEV *getIntegerSCEV(int64_t Val, const Type *Ty); - /// getUMaxFromMismatchedTypes - Promote the operands to the wider of /// the types using zero-extension, and then perform a umax operation /// with them. diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h index baf6946..9501555 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpander.h +++ b/include/llvm/Analysis/ScalarEvolutionExpander.h @@ -32,6 +32,7 @@ namespace llvm { std::map, AssertingVH > InsertedExpressions; std::set InsertedValues; + std::set InsertedPostIncValues; /// PostIncLoops - Addrecs referring to any of the given loops are expanded /// in post-inc mode. For example, expanding {1,+,1} in post-inc mode @@ -102,6 +103,10 @@ namespace llvm { /// clearPostInc - Disable all post-inc expansion. void clearPostInc() { PostIncLoops.clear(); + + // When we change the post-inc loop set, cached expansions may no + // longer be valid. + InsertedPostIncValues.clear(); } /// disableCanonicalMode - Disable the behavior of expanding expressions in @@ -123,6 +128,14 @@ namespace llvm { /// of work to avoid inserting an obviously redundant operation. Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS); + /// ReuseOrCreateCast - Arange for there to be a cast of V to Ty at IP, + /// reusing an existing cast if a suitable one exists, moving an existing + /// cast if a suitable one exists but isn't in the right place, or + /// or creating a new one. + Value *ReuseOrCreateCast(Value *V, const Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP); + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to /// share the casts. @@ -146,7 +159,7 @@ namespace llvm { /// inserted by the code rewriter. If so, the client should not modify the /// instruction. bool isInsertedInstruction(Instruction *I) const { - return InsertedValues.count(I); + return InsertedValues.count(I) || InsertedPostIncValues.count(I); } Value *visitConstant(const SCEVConstant *S) { diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index d580897..b9634f0 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -97,7 +97,7 @@ namespace llvm { - /// FindScalarValue - Given an aggregrate and an sequence of indices, see if + /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if /// the scalar value indexed is already around as a register, for example if /// it were inserted directly into the aggregrate. /// diff --git a/include/llvm/Bitcode/ReaderWriter.h b/include/llvm/Bitcode/ReaderWriter.h index 45eb801..a186964 100644 --- a/include/llvm/Bitcode/ReaderWriter.h +++ b/include/llvm/Bitcode/ReaderWriter.h @@ -40,7 +40,8 @@ namespace llvm { std::string *ErrMsg = 0); /// WriteBitcodeToFile - Write the specified module to the specified - /// raw output stream. + /// raw output stream. For streams where it matters, the given stream + /// should be in "binary" mode. void WriteBitcodeToFile(const Module *M, raw_ostream &Out); /// WriteBitcodeToStream - Write the specified module to the specified diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 243ddbb..7ca6c62 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -64,7 +64,7 @@ namespace llvm { /// Target machine description. /// TargetMachine &TM; - + /// Target Asm Printer information. /// const MCAsmInfo *MAI; @@ -73,13 +73,13 @@ namespace llvm { /// streaming. This owns all of the global MC-related objects for the /// generated translation unit. MCContext &OutContext; - + /// OutStreamer - This is the MCStreamer object for the file we are /// generating. This contains the transient state for the current /// translation unit that we are generating (such as the current section /// etc). MCStreamer &OutStreamer; - + /// The current machine function. const MachineFunction *MF; @@ -94,30 +94,30 @@ namespace llvm { /// beginning of each call to runOnMachineFunction(). /// MCSymbol *CurrentFnSym; - + private: // GCMetadataPrinters - The garbage collection metadata printer table. void *GCMetadataPrinters; // Really a DenseMap. - + /// VerboseAsm - Emit comments in assembly output if this is true. /// bool VerboseAsm; static char ID; - + /// If VerboseAsm is set, a pointer to the loop info for this /// function. MachineLoopInfo *LI; /// DD - If the target supports dwarf debug info, this pointer is non-null. DwarfDebug *DD; - + /// DE - If the target supports dwarf exception info, this pointer is /// non-null. DwarfException *DE; - + protected: explicit AsmPrinter(TargetMachine &TM, MCStreamer &Streamer); - + public: virtual ~AsmPrinter(); @@ -128,7 +128,7 @@ namespace llvm { /// getFunctionNumber - Return a unique ID for the current function. /// unsigned getFunctionNumber() const; - + /// getObjFileLowering - Return information about object file lowering. const TargetLoweringObjectFile &getObjFileLowering() const; @@ -137,16 +137,16 @@ namespace llvm { /// getCurrentSection() - Return the current section we are emitting to. const MCSection *getCurrentSection() const; - - + + //===------------------------------------------------------------------===// // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// - + /// getAnalysisUsage - Record analysis usage. - /// + /// void getAnalysisUsage(AnalysisUsage &AU) const; - + /// doInitialization - Set up the AsmPrinter when we are working on a new /// module. If your pass overrides this, it must make sure to explicitly /// call this implementation. @@ -155,7 +155,7 @@ namespace llvm { /// doFinalization - Shut down the asmprinter. If you override this in your /// pass, you must make sure to call it explicitly. bool doFinalization(Module &M); - + /// runOnMachineFunction - Emit the specified function out to the /// OutStreamer. virtual bool runOnMachineFunction(MachineFunction &MF) { @@ -163,20 +163,20 @@ namespace llvm { EmitFunctionHeader(); EmitFunctionBody(); return false; - } - + } + //===------------------------------------------------------------------===// // Coarse grained IR lowering routines. //===------------------------------------------------------------------===// - + /// SetupMachineFunction - This should be called when a new MachineFunction /// is being processed from runOnMachineFunction. void SetupMachineFunction(MachineFunction &MF); - + /// EmitFunctionHeader - This method emits the header for the current /// function. void EmitFunctionHeader(); - + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void EmitFunctionBody(); @@ -187,15 +187,15 @@ namespace llvm { /// the code generator. /// virtual void EmitConstantPool(); - - /// EmitJumpTableInfo - Print assembly representations of the jump tables - /// used by the current function to the current output stream. + + /// EmitJumpTableInfo - Print assembly representations of the jump tables + /// used by the current function to the current output stream. /// void EmitJumpTableInfo(); - + /// EmitGlobalVariable - Emit the specified global variable to the .s file. virtual void EmitGlobalVariable(const GlobalVariable *GV); - + /// EmitSpecialLLVMGlobal - Check to see if the specified global is a /// special global used by LLVM. If so, emit it and return true, otherwise /// do nothing and return false. @@ -208,54 +208,54 @@ namespace llvm { /// if required for correctness. /// void EmitAlignment(unsigned NumBits, const GlobalValue *GV = 0) const; - + /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing /// it if appropriate. void EmitBasicBlockStart(const MachineBasicBlock *MBB) const; - + /// EmitGlobalConstant - Print a general LLVM constant to the .s file. void EmitGlobalConstant(const Constant *CV, unsigned AddrSpace = 0); - - + + //===------------------------------------------------------------------===// // Overridable Hooks //===------------------------------------------------------------------===// - + // Targets can, or in the case of EmitInstruction, must implement these to // customize output. - + /// EmitStartOfAsmFile - This virtual method can be overridden by targets /// that want to emit something at the start of their file. virtual void EmitStartOfAsmFile(Module &) {} - + /// EmitEndOfAsmFile - This virtual method can be overridden by targets that /// want to emit something at the end of their file. virtual void EmitEndOfAsmFile(Module &) {} - + /// EmitFunctionBodyStart - Targets can override this to emit stuff before /// the first basic block in the function. virtual void EmitFunctionBodyStart() {} - + /// EmitFunctionBodyEnd - Targets can override this to emit stuff after /// the last basic block in the function. virtual void EmitFunctionBodyEnd() {} - + /// EmitInstruction - Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { assert(0 && "EmitInstruction not implemented"); } - + virtual void EmitFunctionEntryLabel(); - + virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV); - + /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; - + //===------------------------------------------------------------------===// // Symbol Lowering Routines. //===------------------------------------------------------------------===// @@ -264,23 +264,23 @@ namespace llvm { /// GetTempSymbol - Return the MCSymbol corresponding to the assembler /// temporary label with the specified stem and unique ID. MCSymbol *GetTempSymbol(StringRef Name, unsigned ID) const; - + /// GetTempSymbol - Return an assembler temporary label with the specified /// stem. MCSymbol *GetTempSymbol(StringRef Name) const; - - + + /// GetSymbolWithGlobalValueBase - Return the MCSymbol for a symbol with /// global value name as its base, with the specified suffix, and where the /// symbol is forced to have private linkage if ForcePrivate is true. MCSymbol *GetSymbolWithGlobalValueBase(const GlobalValue *GV, StringRef Suffix, bool ForcePrivate = true) const; - + /// GetExternalSymbolSymbol - Return the MCSymbol for the specified /// ExternalSymbol. MCSymbol *GetExternalSymbolSymbol(StringRef Sym) const; - + /// GetCPISymbol - Return the symbol for the specified constant pool entry. MCSymbol *GetCPISymbol(unsigned CPID) const; @@ -302,42 +302,42 @@ namespace llvm { public: /// printOffset - This is just convenient handler for printing offsets. void printOffset(int64_t Offset, raw_ostream &OS) const; - + /// EmitInt8 - Emit a byte directive and value. /// void EmitInt8(int Value) const; - + /// EmitInt16 - Emit a short directive and value. /// void EmitInt16(int Value) const; - + /// EmitInt32 - Emit a long directive and value. /// void EmitInt32(int Value) const; - + /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size /// in bytes of the directive is specified by Size and Hi/Lo specify the /// labels. This implicitly uses .set if it is available. void EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const; - - /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" + + /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, const MCSymbol *Lo, unsigned Size) const; - + //===------------------------------------------------------------------===// // Dwarf Emission Helper Routines //===------------------------------------------------------------------===// - + /// EmitSLEB128 - emit the specified signed leb128 value. void EmitSLEB128(int Value, const char *Desc = 0) const; - + /// EmitULEB128 - emit the specified unsigned leb128 value. void EmitULEB128(unsigned Value, const char *Desc = 0, unsigned PadTo = 0) const; - + /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void EmitCFAByte(unsigned Val) const; @@ -346,15 +346,15 @@ namespace llvm { /// describing the encoding. Desc is a string saying what the encoding is /// specifying (e.g. "LSDA"). void EmitEncodingByte(unsigned Val, const char *Desc = 0) const; - + /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. unsigned GetSizeOfEncodedValue(unsigned Encoding) const; - + /// EmitReference - Emit a reference to a label with a specified encoding. /// void EmitReference(const MCSymbol *Sym, unsigned Encoding) const; void EmitReference(const GlobalValue *GV, unsigned Encoding) const; - + /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of /// its section. This can be done with a special directive if the target /// supports it (e.g. cygwin) or by emitting it as an offset from a label at @@ -372,20 +372,20 @@ namespace llvm { //===------------------------------------------------------------------===// // Dwarf Lowering Routines //===------------------------------------------------------------------===// - + /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. - void EmitFrameMoves(const std::vector &Moves, + void EmitFrameMoves(const std::vector &Moves, MCSymbol *BaseLabel, bool isEH) const; - - + + //===------------------------------------------------------------------===// // Inline Asm Support //===------------------------------------------------------------------===// public: // These are hooks that targets can override to implement inline asm // support. These should probably be moved out of AsmPrinter someday. - + /// PrintSpecial - Print information related to the specified machine instr /// that is independent of the operand, and may be independent of the instr /// itself. This can be useful for portably encoding the comment character @@ -394,7 +394,7 @@ namespace llvm { /// for their own strange codes. virtual void PrintSpecial(const MachineInstr *MI, raw_ostream &OS, const char *Code) const; - + /// PrintAsmOperand - Print the specified operand of MI, an INLINEASM /// instruction, using the specified assembler variant. Targets should /// override this to format as appropriate. This method can return true if @@ -402,16 +402,16 @@ namespace llvm { virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS); - + /// PrintAsmMemoryOperand - Print the specified operand of MI, an INLINEASM /// instruction, using the specified assembler variant as an address. /// Targets should override this to format as appropriate. This method can /// return true if the operand is erroneous. virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, + unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS); - + private: /// Private state for PrintSpecial() // Assign a unique ID to this machine instruction. @@ -422,7 +422,7 @@ namespace llvm { /// EmitInlineAsm - Emit a blob of inline asm to the output streamer. void EmitInlineAsm(StringRef Str, unsigned LocCookie) const; - + /// EmitInlineAsm - This method formats and emits the specified machine /// instruction that is an inline asm. void EmitInlineAsm(const MachineInstr *MI) const; @@ -430,13 +430,13 @@ namespace llvm { //===------------------------------------------------------------------===// // Internal Implementation Details //===------------------------------------------------------------------===// - + /// EmitVisibility - This emits visibility information about symbol, if /// this is suported by the target. void EmitVisibility(MCSymbol *Sym, unsigned Visibility) const; - + void EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const; - + void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid) const; diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 45a2757..7911907 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -17,14 +17,13 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Target/TargetCallingConv.h" #include "llvm/CallingConv.h" namespace llvm { class TargetRegisterInfo; class TargetMachine; class CCState; - class SDNode; /// CCValAssign - Represent assignment of one arg/retval to a location. class CCValAssign { @@ -35,6 +34,9 @@ public: ZExt, // The value is zero extended in the location. AExt, // The value is extended with undefined upper bits. BCvt, // The value is bit-converted in the location. + VExt, // The value is vector-widened in the location. + // FIXME: Not implemented yet. Code that uses AExt to mean + // vector-widen should be fixed to use VExt instead. Indirect // The location contains pointer to the value. // TODO: a subset of the value is in the location. }; @@ -186,8 +188,7 @@ public: /// CheckReturn - Analyze the return values of a function, returning /// true if the return can be performed without sret-demotion, and /// false otherwise. - bool CheckReturn(const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, + bool CheckReturn(const SmallVectorImpl &ArgsFlags, CCAssignFn Fn); /// AnalyzeCallOperands - Analyze the outgoing arguments to a call, diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 005c7bc..7f3a7c7 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -19,11 +19,13 @@ #include "llvm/ADT/SmallSet.h" #endif #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" namespace llvm { class AllocaInst; class ConstantFP; +class FunctionLoweringInfo; class Instruction; class MachineBasicBlock; class MachineConstantPool; @@ -36,22 +38,15 @@ class TargetInstrInfo; class TargetLowering; class TargetMachine; class TargetRegisterClass; +class TargetRegisterInfo; /// FastISel - This is a fast-path instruction selection class that /// generates poor code and doesn't support illegal types or non-trivial /// lowering, but runs quickly. class FastISel { protected: - MachineBasicBlock *MBB; DenseMap LocalValueMap; - DenseMap &ValueMap; - DenseMap &MBBMap; - DenseMap &StaticAllocaMap; - std::vector > &PHINodesToUpdate; -#ifndef NDEBUG - SmallSet &CatchInfoLost; -#endif - MachineFunction &MF; + FunctionLoweringInfo &FuncInfo; MachineRegisterInfo &MRI; MachineFrameInfo &MFI; MachineConstantPool &MCP; @@ -60,23 +55,22 @@ protected: const TargetData &TD; const TargetInstrInfo &TII; const TargetLowering &TLI; - bool IsBottomUp; + const TargetRegisterInfo &TRI; + MachineInstr *LastLocalValue; public: + /// getLastLocalValue - Return the position of the last instruction + /// emitted for materializing constants for use in the current block. + MachineInstr *getLastLocalValue() { return LastLocalValue; } + + /// setLastLocalValue - Update the position of the last instruction + /// emitted for materializing constants for use in the current block. + void setLastLocalValue(MachineInstr *I) { LastLocalValue = I; } + /// startNewBlock - Set the current block to which generated machine /// instructions will be appended, and clear the local CSE map. /// - void startNewBlock(MachineBasicBlock *mbb) { - setCurrentBlock(mbb); - LocalValueMap.clear(); - } - - /// setCurrentBlock - Set the current block to which generated machine - /// instructions will be appended. - /// - void setCurrentBlock(MachineBasicBlock *mbb) { - MBB = mbb; - } + void startNewBlock(); /// getCurDebugLoc() - Return current debug location information. DebugLoc getCurDebugLoc() const { return DL; } @@ -108,18 +102,21 @@ public: /// index value. std::pair getRegForGEPIndex(const Value *V); + /// recomputeInsertPt - Reset InsertPt to prepare for insterting instructions + /// into the current block. + void recomputeInsertPt(); + + /// enterLocalValueArea - Prepare InsertPt to begin inserting instructions + /// into the local value area and return the old insert position. + MachineBasicBlock::iterator enterLocalValueArea(); + + /// leaveLocalValueArea - Reset InsertPt to the given old insert position + void leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt); + virtual ~FastISel(); protected: - FastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &PHINodesToUpdate -#ifndef NDEBUG - , SmallSet &cil -#endif - ); + explicit FastISel(FunctionLoweringInfo &funcInfo); /// TargetSelectInstruction - This method is called by target-independent /// code when the normal FastISel process fails to select an instruction. @@ -286,7 +283,7 @@ protected: /// FastEmitBranch - Emit an unconditional branch to the given block, /// unless it is the immediate (fall-through) successor, and update /// the CFG. - void FastEmitBranch(MachineBasicBlock *MBB); + void FastEmitBranch(MachineBasicBlock *MBB, DebugLoc DL); unsigned UpdateValueMap(const Value* I, unsigned Reg); @@ -305,6 +302,8 @@ protected: } private: + bool SelectLoad(const User *I); + bool SelectBinaryOp(const User *I, unsigned ISDOpcode); bool SelectFNeg(const User *I); diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h new file mode 100644 index 0000000..c49d1ed --- /dev/null +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -0,0 +1,154 @@ +//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H +#define LLVM_CODEGEN_FUNCTIONLOWERINGINFO_H + +#include "llvm/InlineAsm.h" +#include "llvm/Instructions.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#ifndef NDEBUG +#include "llvm/ADT/SmallSet.h" +#endif +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/CallSite.h" +#include + +namespace llvm { + +class AllocaInst; +class BasicBlock; +class CallInst; +class Function; +class GlobalVariable; +class Instruction; +class MachineInstr; +class MachineBasicBlock; +class MachineFunction; +class MachineModuleInfo; +class MachineRegisterInfo; +class TargetLowering; +class Value; + +//===--------------------------------------------------------------------===// +/// FunctionLoweringInfo - This contains information that is global to a +/// function that is used when lowering a region of the function. +/// +class FunctionLoweringInfo { +public: + const TargetLowering &TLI; + const Function *Fn; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + + /// CanLowerReturn - true iff the function's return value can be lowered to + /// registers. + bool CanLowerReturn; + + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg + /// allocated to hold a pointer to the hidden sret parameter. + unsigned DemoteRegister; + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + DenseMap MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + DenseMap StaticAllocaMap; + + /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for + /// function arguments that are inserted after scheduling is completed. + SmallVector ArgDbgValues; + + /// RegFixups - Registers which need to be replaced after isel is done. + DenseMap RegFixups; + + /// MBB - The current block. + MachineBasicBlock *MBB; + + /// MBB - The current insert position inside the current block. + MachineBasicBlock::iterator InsertPt; + +#ifndef NDEBUG + SmallSet CatchInfoLost; + SmallSet CatchInfoFound; +#endif + + struct LiveOutInfo { + unsigned NumSignBits; + APInt KnownOne, KnownZero; + LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + }; + + /// LiveOutRegInfo - Information about live out vregs, indexed by their + /// register number offset by 'FirstVirtualRegister'. + std::vector LiveOutRegInfo; + + /// PHINodesToUpdate - A list of phi instructions whose operand list will + /// be updated after processing the current basic block. + /// TODO: This isn't per-function state, it's per-basic-block state. But + /// there's no other convenient place for it to live right now. + std::vector > PHINodesToUpdate; + + explicit FunctionLoweringInfo(const TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(const Function &Fn, MachineFunction &MF); + + /// clear - Clear out all the function-specific state. This returns this + /// FunctionLoweringInfo to an empty state, ready to be used for a + /// different function. + void clear(); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateReg(EVT VT); + + unsigned CreateRegs(const Type *Ty); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegs(V->getType()); + } +}; + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(const CallInst &I, + MachineModuleInfo *MMI, MachineBasicBlock *MBB); + +/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. +void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); + +} // end namespace llvm + +#endif diff --git a/include/llvm/CodeGen/GCMetadata.h b/include/llvm/CodeGen/GCMetadata.h index 6de69cd..b401068 100644 --- a/include/llvm/CodeGen/GCMetadata.h +++ b/include/llvm/CodeGen/GCMetadata.h @@ -1,4 +1,4 @@ -//===-- GCMetadata.h - Garbage collector metadata -------------------------===// +//===-- GCMetadata.h - Garbage collector metadata ---------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,7 +14,7 @@ // // The GCFunctionInfo class logs the data necessary to build a type accurate // stack map. The code generator outputs: -// +// // - Safe points as specified by the GCStrategy's NeededSafePoints. // - Stack offsets for GC roots, as specified by calls to llvm.gcroot // @@ -42,10 +42,10 @@ namespace llvm { class GCStrategy; class Constant; class MCSymbol; - + namespace GC { /// PointKind - The type of a collector-safe point. - /// + /// enum PointKind { Loop, //< Instr is a loop (backwards branch). Return, //< Instr is a return instruction. @@ -53,138 +53,138 @@ namespace llvm { PostCall //< Instr is the return address of a call. }; } - + /// GCPoint - Metadata for a collector-safe point in machine code. - /// + /// struct GCPoint { GC::PointKind Kind; //< The kind of the safe point. MCSymbol *Label; //< A label. - + GCPoint(GC::PointKind K, MCSymbol *L) : Kind(K), Label(L) {} }; - + /// GCRoot - Metadata for a pointer to an object managed by the garbage /// collector. struct GCRoot { int Num; //< Usually a frame index. int StackOffset; //< Offset from the stack pointer. const Constant *Metadata;//< Metadata straight from the call to llvm.gcroot. - + GCRoot(int N, const Constant *MD) : Num(N), StackOffset(-1), Metadata(MD) {} }; - - + + /// GCFunctionInfo - Garbage collection metadata for a single function. - /// + /// class GCFunctionInfo { public: typedef std::vector::iterator iterator; typedef std::vector::iterator roots_iterator; typedef std::vector::const_iterator live_iterator; - + private: const Function &F; GCStrategy &S; uint64_t FrameSize; std::vector Roots; std::vector SafePoints; - + // FIXME: Liveness. A 2D BitVector, perhaps? - // + // // BitVector Liveness; - // + // // bool islive(int point, int root) = // Liveness[point * SafePoints.size() + root] - // + // // The bit vector is the more compact representation where >3.2% of roots // are live per safe point (1.5% on 64-bit hosts). - + public: GCFunctionInfo(const Function &F, GCStrategy &S); ~GCFunctionInfo(); - + /// getFunction - Return the function to which this metadata applies. - /// + /// const Function &getFunction() const { return F; } - + /// getStrategy - Return the GC strategy for the function. - /// + /// GCStrategy &getStrategy() { return S; } - + /// addStackRoot - Registers a root that lives on the stack. Num is the /// stack object ID for the alloca (if the code generator is // using MachineFrameInfo). void addStackRoot(int Num, const Constant *Metadata) { Roots.push_back(GCRoot(Num, Metadata)); } - + /// addSafePoint - Notes the existence of a safe point. Num is the ID of the - /// label just prior to the safe point (if the code generator is using + /// label just prior to the safe point (if the code generator is using /// MachineModuleInfo). void addSafePoint(GC::PointKind Kind, MCSymbol *Label) { SafePoints.push_back(GCPoint(Kind, Label)); } - + /// getFrameSize/setFrameSize - Records the function's frame size. - /// + /// uint64_t getFrameSize() const { return FrameSize; } void setFrameSize(uint64_t S) { FrameSize = S; } - + /// begin/end - Iterators for safe points. - /// + /// iterator begin() { return SafePoints.begin(); } iterator end() { return SafePoints.end(); } size_t size() const { return SafePoints.size(); } - + /// roots_begin/roots_end - Iterators for all roots in the function. - /// + /// roots_iterator roots_begin() { return Roots.begin(); } roots_iterator roots_end () { return Roots.end(); } size_t roots_size() const { return Roots.size(); } - + /// live_begin/live_end - Iterators for live roots at a given safe point. - /// + /// live_iterator live_begin(const iterator &p) { return roots_begin(); } live_iterator live_end (const iterator &p) { return roots_end(); } size_t live_size(const iterator &p) const { return roots_size(); } }; - - + + /// GCModuleInfo - Garbage collection metadata for a whole module. - /// + /// class GCModuleInfo : public ImmutablePass { typedef StringMap strategy_map_type; typedef std::vector list_type; typedef DenseMap finfo_map_type; - + strategy_map_type StrategyMap; list_type StrategyList; finfo_map_type FInfoMap; - + GCStrategy *getOrCreateStrategy(const Module *M, const std::string &Name); - + public: typedef list_type::const_iterator iterator; - + static char ID; - + GCModuleInfo(); ~GCModuleInfo(); - + /// clear - Resets the pass. The metadata deleter pass calls this. - /// + /// void clear(); - + /// begin/end - Iterators for used strategies. - /// + /// iterator begin() const { return StrategyList.begin(); } iterator end() const { return StrategyList.end(); } - + /// get - Look up function metadata. - /// + /// GCFunctionInfo &getFunctionInfo(const Function &F); }; - + } #endif diff --git a/include/llvm/CodeGen/GCMetadataPrinter.h b/include/llvm/CodeGen/GCMetadataPrinter.h index 3703545..17a2653 100644 --- a/include/llvm/CodeGen/GCMetadataPrinter.h +++ b/include/llvm/CodeGen/GCMetadataPrinter.h @@ -25,49 +25,49 @@ #include "llvm/Support/Registry.h" namespace llvm { - + class GCMetadataPrinter; - + /// GCMetadataPrinterRegistry - The GC assembly printer registry uses all the /// defaults from Registry. typedef Registry GCMetadataPrinterRegistry; - + /// GCMetadataPrinter - Emits GC metadata as assembly code. - /// + /// class GCMetadataPrinter { public: typedef GCStrategy::list_type list_type; typedef GCStrategy::iterator iterator; - + private: GCStrategy *S; - + friend class AsmPrinter; - + protected: // May only be subclassed. GCMetadataPrinter(); - + // Do not implement. GCMetadataPrinter(const GCMetadataPrinter &); GCMetadataPrinter &operator=(const GCMetadataPrinter &); - + public: GCStrategy &getStrategy() { return *S; } const Module &getModule() const { return S->getModule(); } - + /// begin/end - Iterate over the collected function metadata. iterator begin() { return S->begin(); } iterator end() { return S->end(); } - + /// beginAssembly/finishAssembly - Emit module metadata as assembly code. virtual void beginAssembly(AsmPrinter &AP); - + virtual void finishAssembly(AsmPrinter &AP); - + virtual ~GCMetadataPrinter(); }; - + } #endif diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index a5e9dd5..69de598 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -130,7 +130,7 @@ namespace ISD { /// This node represents a target intrinsic function with no side effects. /// The first operand is the ID number of the intrinsic from the /// llvm::Intrinsic namespace. The operands to the intrinsic follow. The - /// node has returns the result of the intrinsic. + /// node returns the result of the intrinsic. INTRINSIC_WO_CHAIN, /// RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) @@ -508,8 +508,9 @@ namespace ISD { CALLSEQ_START, // Beginning of a call sequence CALLSEQ_END, // End of a call sequence - // VAARG - VAARG has three operands: an input chain, a pointer, and a - // SRCVALUE. It returns a pair of values: the vaarg value and a new chain. + // VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, + // and the alignment. It returns a pair of values: the vaarg value and a + // new chain. VAARG, // VACOPY - VACOPY has five operands: an input chain, a destination pointer, diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index b4c2f2f..cd8293d 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -33,7 +33,6 @@ namespace { (void) llvm::createDeadMachineInstructionElimPass(); - (void) llvm::createLocalRegisterAllocator(); (void) llvm::createFastRegisterAllocator(); (void) llvm::createLinearScanRegisterAllocator(); (void) llvm::createPBQPRegisterAllocator(); diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 637f52b..8d80efb 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -53,7 +53,7 @@ namespace llvm { class VNInfo { private: enum { - HAS_PHI_KILL = 1, + HAS_PHI_KILL = 1, REDEF_BY_EC = 1 << 1, IS_PHI_DEF = 1 << 2, IS_UNUSED = 1 << 3, @@ -67,22 +67,14 @@ namespace llvm { } cr; public: - typedef SpecificBumpPtrAllocator Allocator; - typedef SmallVector KillSet; + typedef BumpPtrAllocator Allocator; /// The ID number of this value. unsigned id; - + /// The index of the defining instruction (if isDefAccurate() returns true). SlotIndex def; - KillSet kills; - - /* - VNInfo(LiveIntervals &li_) - : defflags(IS_UNUSED), id(~1U) { cr.copy = 0; } - */ - /// VNInfo constructor. /// d is presumed to point to the actual defining instr. If it doesn't /// setIsDefAccurate(false) should be called after construction. @@ -91,7 +83,7 @@ namespace llvm { /// VNInfo construtor, copies values from orig, except for the value number. VNInfo(unsigned i, const VNInfo &orig) - : flags(orig.flags), cr(orig.cr), id(i), def(orig.def), kills(orig.kills) + : flags(orig.flags), cr(orig.cr), id(i), def(orig.def) { } /// Copy from the parameter into this VNInfo. @@ -99,7 +91,6 @@ namespace llvm { flags = src.flags; cr = src.cr; def = src.def; - kills = src.kills; } /// Used for copying value number info. @@ -114,7 +105,7 @@ namespace llvm { /// This method should not be called on stack intervals as it may lead to /// undefined behavior. void setCopy(MachineInstr *c) { cr.copy = c; } - + /// For a stack interval, returns the reg which this stack interval was /// defined from. /// For a register interval the behaviour of this method is undefined. @@ -144,7 +135,7 @@ namespace llvm { else flags &= ~REDEF_BY_EC; } - + /// Returns true if this value is defined by a PHI instruction (or was, /// PHI instrucions may have been eliminated). bool isPHIDef() const { return flags & IS_PHI_DEF; } @@ -172,49 +163,9 @@ namespace llvm { void setIsDefAccurate(bool defAccurate) { if (defAccurate) flags |= IS_DEF_ACCURATE; - else + else flags &= ~IS_DEF_ACCURATE; } - - /// Returns true if the given index is a kill of this value. - bool isKill(SlotIndex k) const { - KillSet::const_iterator - i = std::lower_bound(kills.begin(), kills.end(), k); - return (i != kills.end() && *i == k); - } - - /// addKill - Add a kill instruction index to the specified value - /// number. - void addKill(SlotIndex k) { - if (kills.empty()) { - kills.push_back(k); - } else { - KillSet::iterator - i = std::lower_bound(kills.begin(), kills.end(), k); - kills.insert(i, k); - } - } - - /// Remove the specified kill index from this value's kills list. - /// Returns true if the value was present, otherwise returns false. - bool removeKill(SlotIndex k) { - KillSet::iterator i = std::lower_bound(kills.begin(), kills.end(), k); - if (i != kills.end() && *i == k) { - kills.erase(i); - return true; - } - return false; - } - - /// Remove all kills in the range [s, e). - void removeKills(SlotIndex s, SlotIndex e) { - KillSet::iterator - si = std::lower_bound(kills.begin(), kills.end(), s), - se = std::upper_bound(kills.begin(), kills.end(), e); - - kills.erase(si, se); - } - }; /// LiveRange structure - This represents a simple register range in the @@ -258,6 +209,8 @@ namespace llvm { LiveRange(); // DO NOT IMPLEMENT }; + template <> struct isPodLike { static const bool value = true; }; + raw_ostream& operator<<(raw_ostream& os, const LiveRange &LR); @@ -366,8 +319,8 @@ namespace llvm { /// the instruction that defines the value number. VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI, bool isDefAccurate, VNInfo::Allocator &VNInfoAllocator) { - VNInfo *VNI = VNInfoAllocator.Allocate(); - new (VNI) VNInfo((unsigned)valnos.size(), def, CopyMI); + VNInfo *VNI = + new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), def, CopyMI); VNI->setIsDefAccurate(isDefAccurate); valnos.push_back(VNI); return VNI; @@ -377,23 +330,12 @@ namespace llvm { /// for the Value number. VNInfo *createValueCopy(const VNInfo *orig, VNInfo::Allocator &VNInfoAllocator) { - VNInfo *VNI = VNInfoAllocator.Allocate(); - new (VNI) VNInfo((unsigned)valnos.size(), *orig); + VNInfo *VNI = + new (VNInfoAllocator) VNInfo((unsigned)valnos.size(), *orig); valnos.push_back(VNI); return VNI; } - /// addKills - Add a number of kills into the VNInfo kill vector. If this - /// interval is live at a kill point, then the kill is not added. - void addKills(VNInfo *VNI, const VNInfo::KillSet &kills) { - for (unsigned i = 0, e = static_cast(kills.size()); - i != e; ++i) { - if (!liveBeforeAndAt(kills[i])) { - VNI->addKill(kills[i]); - } - } - } - /// isOnlyLROfValNo - Return true if the specified live range is the only /// one defined by the its val#. bool isOnlyLROfValNo(const LiveRange *LR) { @@ -472,6 +414,17 @@ namespace llvm { // range.If it does, then check if the previous live range ends at index-1. bool liveBeforeAndAt(SlotIndex index) const; + /// killedAt - Return true if a live range ends at index. Note that the kill + /// point is not contained in the half-open live range. It is usually the + /// getDefIndex() slot following its last use. + bool killedAt(SlotIndex index) const; + + /// killedInRange - Return true if the interval has kills in [Start,End). + /// Note that the kill point is considered the end of a live range, so it is + /// not contained in the live range. If a live range ends at End, it won't + /// be counted as a kill by this method. + bool killedInRange(SlotIndex Start, SlotIndex End) const; + /// getLiveRangeContaining - Return the live range that contains the /// specified index, or null if there is none. const LiveRange *getLiveRangeContaining(SlotIndex Idx) const { @@ -486,6 +439,12 @@ namespace llvm { return I == end() ? 0 : &*I; } + /// getVNInfoAt - Return the VNInfo that is live at Idx, or NULL. + VNInfo *getVNInfoAt(SlotIndex Idx) const { + const_iterator I = FindLiveRangeContaining(Idx); + return I == end() ? 0 : I->valno; + } + /// FindLiveRangeContaining - Return an iterator to the live range that /// contains the specified index, or end() if there is none. const_iterator FindLiveRangeContaining(SlotIndex Idx) const; diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 32fa709..5a0d81b 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -133,10 +133,9 @@ namespace llvm { bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm, unsigned reg); - /// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except - /// it checks for sub-register reference and it can check use as well. - bool conflictsWithSubPhysRegRef(LiveInterval &li, unsigned Reg, - bool CheckUse, + /// conflictsWithAliasRef - Similar to conflictsWithPhysRegRef except + /// it checks for alias uses and defs. + bool conflictsWithAliasRef(LiveInterval &li, unsigned Reg, SmallPtrSet &JoinedCopies); // Interval creation @@ -229,10 +228,6 @@ namespace llvm { VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; } - /// getVNInfoSourceReg - Helper function that parses the specified VNInfo - /// copy field and returns the source register that defines it. - unsigned getVNInfoSourceReg(const VNInfo *VNI) const; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void releaseMemory(); @@ -249,12 +244,6 @@ namespace llvm { addIntervalsForSpills(const LiveInterval& i, SmallVectorImpl &SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap& vrm); - - /// addIntervalsForSpillsFast - Quickly create new intervals for spilled - /// defs / uses without remat or splitting. - std::vector - addIntervalsForSpillsFast(const LiveInterval &li, - const MachineLoopInfo *loopInfo, VirtRegMap &vrm); /// spillPhysRegAroundRegDefsUses - Spill the specified physical register /// around all defs and uses of the specified interval. Return true if it diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index cc651ca..3cfc47a 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -19,6 +19,7 @@ namespace llvm { +class Pass; class BasicBlock; class MachineFunction; class MCSymbol; @@ -258,6 +259,11 @@ public: /// machine basic block (i.e., copies all the successors fromMBB and /// remove all the successors from fromMBB). void transferSuccessors(MachineBasicBlock *fromMBB); + + /// transferSuccessorsAndUpdatePHIs - Transfers all the successors, as + /// in transferSuccessors, and update PHI operands in the successor blocks + /// which refer to fromMBB to refer to this. + void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB); /// isSuccessor - Return true if the specified MBB is a successor of this /// block. @@ -276,11 +282,26 @@ public: /// branch to do so (e.g., a table jump). True is a conservative answer. bool canFallThrough(); + /// Returns a pointer to the first instructon in this block that is not a + /// PHINode instruction. When adding instruction to the beginning of the + /// basic block, they should be added before the returned value, not before + /// the first instruction, which might be PHI. + /// Returns end() is there's no non-PHI instruction. + iterator getFirstNonPHI(); + /// getFirstTerminator - returns an iterator to the first terminator /// instruction of this basic block. If a terminator does not exist, /// it returns end() iterator getFirstTerminator(); + /// SplitCriticalEdge - Split the critical edge from this block to the + /// given successor block, and return the newly created block, or null + /// if splitting is not possible. + /// + /// This function updates LiveVariables, MachineDominatorTree, and + /// MachineLoopInfo, as applicable. + MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P); + void pop_front() { Insts.pop_front(); } void pop_back() { Insts.pop_back(); } void push_back(MachineInstr *MI) { Insts.push_back(MI); } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index fe2c298..9471316 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -33,16 +33,14 @@ class BitVector; /// callee saved register in the current frame. class CalleeSavedInfo { unsigned Reg; - const TargetRegisterClass *RegClass; int FrameIdx; public: - CalleeSavedInfo(unsigned R, const TargetRegisterClass *RC, int FI = 0) - : Reg(R), RegClass(RC), FrameIdx(FI) {} + explicit CalleeSavedInfo(unsigned R, int FI = 0) + : Reg(R), FrameIdx(FI) {} // Accessors. unsigned getReg() const { return Reg; } - const TargetRegisterClass *getRegClass() const { return RegClass; } int getFrameIdx() const { return FrameIdx; } void setFrameIdx(int FI) { FrameIdx = FI; } }; @@ -100,8 +98,7 @@ class MachineFrameInfo { // cannot alias any other memory objects. bool isSpillSlot; - StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, - bool isSS) + StackObject(uint64_t Sz, unsigned Al, int64_t SP, bool IM, bool isSS) : SPOffset(SP), Size(Sz), Alignment(Al), isImmutable(IM), isSpillSlot(isSS) {} }; @@ -352,8 +349,7 @@ public: /// efficiency. By default, fixed objects are immutable. This returns an /// index with a negative value. /// - int CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isSS); + int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool Immutable); /// isFixedObjectIndex - Returns true if the specified index corresponds to a diff --git a/include/llvm/CodeGen/MachineFunctionPass.h b/include/llvm/CodeGen/MachineFunctionPass.h index 1a2b129..685e868 100644 --- a/include/llvm/CodeGen/MachineFunctionPass.h +++ b/include/llvm/CodeGen/MachineFunctionPass.h @@ -34,9 +34,6 @@ protected: explicit MachineFunctionPass(intptr_t ID) : FunctionPass(ID) {} explicit MachineFunctionPass(void *ID) : FunctionPass(ID) {} - /// createPrinterPass - Get a machine function printer pass. - Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const; - /// runOnMachineFunction - This method must be overloaded to perform the /// desired machine code transformation or analysis. /// @@ -51,7 +48,11 @@ protected: virtual void getAnalysisUsage(AnalysisUsage &AU) const; private: - bool runOnFunction(Function &F); + /// createPrinterPass - Get a machine function printer pass. + virtual Pass *createPrinterPass(raw_ostream &O, + const std::string &Banner) const; + + virtual bool runOnFunction(Function &F); }; } // End llvm namespace diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index cf691bb..e67b2dd 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -215,9 +215,6 @@ public: bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } bool isInlineAsm() const { return getOpcode() == TargetOpcode::INLINEASM; } - bool isExtractSubreg() const { - return getOpcode() == TargetOpcode::EXTRACT_SUBREG; - } bool isInsertSubreg() const { return getOpcode() == TargetOpcode::INSERT_SUBREG; } @@ -227,7 +224,22 @@ public: bool isRegSequence() const { return getOpcode() == TargetOpcode::REG_SEQUENCE; } - + bool isCopy() const { + return getOpcode() == TargetOpcode::COPY; + } + + /// isCopyLike - Return true if the instruction behaves like a copy. + /// This does not include native copy instructions. + bool isCopyLike() const { + return isCopy() || isSubregToReg(); + } + + /// isIdentityCopy - Return true is the instruction is an identity copy. + bool isIdentityCopy() const { + return isCopy() && getOperand(0).getReg() == getOperand(1).getReg() && + getOperand(0).getSubReg() == getOperand(1).getSubReg(); + } + /// readsRegister - Return true if the MachineInstr reads the specified /// register. If TargetRegisterInfo is passed, then it also checks if there /// is a read of a super-register. @@ -339,6 +351,11 @@ public: /// copyPredicates - Copies predicate operand(s) from MI. void copyPredicates(const MachineInstr *MI); + /// substituteRegister - Replace all occurrences of FromReg with ToReg:SubIdx, + /// properly composing subreg indices where necessary. + void substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, + const TargetRegisterInfo &RegInfo); + /// addRegisterKilled - We have determined MI kills a register. Look for the /// operand that uses it and mark it as IsKill. If AddIfNotFound is true, /// add a implicit operand if it's not found. Returns true if the operand @@ -359,6 +376,11 @@ public: void addRegisterDefined(unsigned IncomingReg, const TargetRegisterInfo *RegInfo = 0); + /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as dead + /// except those in the UsedRegs list. + void setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, + const TargetRegisterInfo &TRI); + /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. diff --git a/include/llvm/CodeGen/MachineJumpTableInfo.h b/include/llvm/CodeGen/MachineJumpTableInfo.h index 1b6ab2c..6264349 100644 --- a/include/llvm/CodeGen/MachineJumpTableInfo.h +++ b/include/llvm/CodeGen/MachineJumpTableInfo.h @@ -74,7 +74,7 @@ private: JTEntryKind EntryKind; std::vector JumpTables; public: - MachineJumpTableInfo(JTEntryKind Kind): EntryKind(Kind) {} + explicit MachineJumpTableInfo(JTEntryKind Kind): EntryKind(Kind) {} JTEntryKind getEntryKind() const { return EntryKind; } diff --git a/include/llvm/CodeGen/MachineLoopInfo.h b/include/llvm/CodeGen/MachineLoopInfo.h index 8459a8d..3b3e31e 100644 --- a/include/llvm/CodeGen/MachineLoopInfo.h +++ b/include/llvm/CodeGen/MachineLoopInfo.h @@ -64,13 +64,13 @@ class MachineLoopInfo : public MachineFunctionPass { void operator=(const MachineLoopInfo &); // do not implement MachineLoopInfo(const MachineLoopInfo &); // do not implement - LoopInfoBase& getBase() { return LI; } - public: static char ID; // Pass identification, replacement for typeid MachineLoopInfo() : MachineFunctionPass(&ID) {} + LoopInfoBase& getBase() { return LI; } + /// iterator/begin/end - The interface to the top-level loops in the current /// function. /// diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index 31858ce..afa2c29 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -27,6 +27,7 @@ class MachineInstr; class MachineRegisterInfo; class MDNode; class TargetMachine; +class TargetRegisterInfo; class raw_ostream; class MCSymbol; @@ -246,7 +247,20 @@ public: assert(isReg() && "Wrong MachineOperand accessor"); SubReg = (unsigned char)subReg; } - + + /// substVirtReg - Substitute the current register with the virtual + /// subregister Reg:SubReg. Take any existing SubReg index into account, + /// using TargetRegisterInfo to compose the subreg indices if necessary. + /// Reg must be a virtual register, SubIdx can be 0. + /// + void substVirtReg(unsigned Reg, unsigned SubIdx, const TargetRegisterInfo&); + + /// substPhysReg - Substitute the current register with the physical register + /// Reg, taking any existing SubReg into account. For instance, + /// substPhysReg(%EAX) will change %reg1024:sub_8bit to %AL. + /// + void substPhysReg(unsigned Reg, const TargetRegisterInfo&); + void setIsUse(bool Val = true) { assert(isReg() && "Wrong MachineOperand accessor"); assert((Val || !isDebug()) && "Marking a debug operation as def"); diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index fa14fdc..066c91b 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -35,7 +35,7 @@ class MachineRegisterInfo { /// RegClassVRegMap - This vector acts as a map from TargetRegisterClass to /// virtual registers. For each target register class, it keeps a list of /// virtual registers belonging to the class. - std::vector > RegClass2VRegMap; + std::vector *RegClass2VRegMap; /// RegAllocHints - This vector records register allocation hints for virtual /// registers. For each virtual register, it keeps a register and hint type @@ -363,7 +363,18 @@ public: defusechain_iterator operator++(int) { // Postincrement defusechain_iterator tmp = *this; ++*this; return tmp; } - + + /// skipInstruction - move forward until reaching a different instruction. + /// Return the skipped instruction that is no longer pointed to, or NULL if + /// already pointing to end(). + MachineInstr *skipInstruction() { + if (!Op) return 0; + MachineInstr *MI = Op->getParent(); + do ++*this; + while (Op && Op->getParent() == MI); + return MI; + } + MachineOperand &getOperand() const { assert(Op && "Cannot dereference end iterator!"); return *Op; diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index 2f5d576..7445ec7 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -85,15 +85,10 @@ namespace llvm { /// FunctionPass *createDeadMachineInstructionElimPass(); - /// Creates a register allocator as the user specified on the command line. + /// Creates a register allocator as the user specified on the command line, or + /// picks one that matches OptLevel. /// - FunctionPass *createRegisterAllocator(); - - /// LocalRegisterAllocation Pass - This pass register allocates the input code - /// a basic block at a time, yielding code better than the simple register - /// allocator, but not as good as a global allocator. - /// - FunctionPass *createLocalRegisterAllocator(); + FunctionPass *createRegisterAllocator(CodeGenOpt::Level OptLevel); /// FastRegisterAllocation Pass - This pass register allocates as fast as /// possible. It is best suited for debug code where live ranges are short. @@ -147,10 +142,6 @@ namespace llvm { /// headers to target specific alignment boundary. FunctionPass *createCodePlacementOptPass(); - /// getRegisterAllocator - This creates an instance of the register allocator - /// for the Sparc. - FunctionPass *getRegisterAllocator(TargetMachine &T); - /// IntrinsicLowering Pass - Performs target-independent LLVM IR /// transformations for highly portable strategies. FunctionPass *createGCLoweringPass(); diff --git a/include/llvm/CodeGen/PostRAHazardRecognizer.h b/include/llvm/CodeGen/PostRAHazardRecognizer.h new file mode 100644 index 0000000..24d73cb --- /dev/null +++ b/include/llvm/CodeGen/PostRAHazardRecognizer.h @@ -0,0 +1,94 @@ +//=- llvm/CodeGen/PostRAHazardRecognizer.h - Scheduling Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PostRAHazardRecognizer class, which +// implements hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H +#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H + +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/System/DataTypes.h" + +#include +#include +#include + +namespace llvm { + +class InstrItineraryData; +class SUnit; + +class PostRAHazardRecognizer : public ScheduleHazardRecognizer { + // ScoreBoard to track function unit usage. ScoreBoard[0] is a + // mask of the FUs in use in the cycle currently being + // schedule. ScoreBoard[1] is a mask for the next cycle. The + // ScoreBoard is used as a circular buffer with the current cycle + // indicated by Head. + class ScoreBoard { + unsigned *Data; + + // The maximum number of cycles monitored by the Scoreboard. This + // value is determined based on the target itineraries to ensure + // that all hazards can be tracked. + size_t Depth; + // Indices into the Scoreboard that represent the current cycle. + size_t Head; + public: + ScoreBoard():Data(NULL), Depth(0), Head(0) { } + ~ScoreBoard() { + delete[] Data; + } + + size_t getDepth() const { return Depth; } + unsigned& operator[](size_t idx) const { + assert(Depth && "ScoreBoard was not initialized properly!"); + + return Data[(Head + idx) % Depth]; + } + + void reset(size_t d = 1) { + if (Data == NULL) { + Depth = d; + Data = new unsigned[Depth]; + } + + memset(Data, 0, Depth * sizeof(Data[0])); + Head = 0; + } + + void advance() { + Head = (Head + 1) % Depth; + } + + // Print the scoreboard. + void dump() const; + }; + + // Itinerary data for the target. + const InstrItineraryData &ItinData; + + ScoreBoard ReservedScoreboard; + ScoreBoard RequiredScoreboard; + +public: + PostRAHazardRecognizer(const InstrItineraryData &ItinData); + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); +}; + +} + +#endif diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h index 1490aa0..7644433 100644 --- a/include/llvm/CodeGen/RegisterCoalescer.h +++ b/include/llvm/CodeGen/RegisterCoalescer.h @@ -25,6 +25,9 @@ namespace llvm { class RegallocQuery; class AnalysisUsage; class MachineInstr; + class TargetRegisterInfo; + class TargetRegisterClass; + class TargetInstrInfo; /// An abstract interface for register coalescers. Coalescers must /// implement this interface to be part of the coalescer analysis @@ -141,6 +144,93 @@ namespace llvm { return true; } }; + + + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + + /// dstReg_ - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned dstReg_; + + /// srcReg_ - the virtual register that will be coalesced into dstReg. + unsigned srcReg_; + + /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the + /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// virtual register. + unsigned subIdx_; + + /// partial_ - True when the original copy was a partial subregister copy. + bool partial_; + + /// crossClass_ - True when both regs are virtual, and newRC is constrained. + bool crossClass_; + + /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy + /// instruction. + bool flipped_; + + /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// is a physreg. + const TargetRegisterClass *newRC_; + + /// compose - Compose subreg indices a and b, either may be 0. + unsigned compose(unsigned, unsigned) const; + + /// isMoveInstr - Return true if MI is a move or subreg instruction. + bool isMoveInstr(const MachineInstr *MI, unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const; + + public: + CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) + : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), + partial_(false), crossClass_(false), flipped_(false), newRC_(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible + /// because dstReg_ is a physical register, or subIdx_ is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !newRC_; } + + /// isPartial - Return true if the original copy instruction did not copy the + /// full register, but was a subreg operation. + bool isPartial() const { return partial_; } + + /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's. + bool isCrossClass() const { return crossClass_; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return flipped_; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return dstReg_; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return srcReg_; } + + /// getSubIdx - Return the subregister index in DstReg that SrcReg will be + /// coalesced into, or 0. + unsigned getSubIdx() const { return subIdx_; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return newRC_; } + }; } // Because of the way .a files work, we must force the SimpleRC diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 84b726d..246831c 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -98,6 +98,10 @@ public: /// getRegsUsed - return all registers currently in use in used. void getRegsUsed(BitVector &used, bool includeReserved); + /// getRegsAvailable - Return all available registers in the register class + /// in Mask. + void getRegsAvailable(const TargetRegisterClass *RC, BitVector &Mask); + /// FindUnusedReg - Find a unused register of the specified register class. /// Return 0 if none is found. unsigned FindUnusedReg(const TargetRegisterClass *RegClass) const; @@ -147,7 +151,12 @@ private: /// Add Reg and its aliases to BV. void addRegWithAliases(BitVector &BV, unsigned Reg); - unsigned findSurvivorReg(MachineBasicBlock::iterator MI, + /// findSurvivorReg - Return the candidate register that is unused for the + /// longest after StartMI. UseMI is set to the instruction where the search + /// stopped. + /// + /// No more than InstrLimit instructions are inspected. + unsigned findSurvivorReg(MachineBasicBlock::iterator StartMI, BitVector &Candidates, unsigned InstrLimit, MachineBasicBlock::iterator &UseMI); diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index 42ae563..a51e82a 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -247,6 +247,40 @@ namespace RTLIB { // EXCEPTION HANDLING UNWIND_RESUME, + // Family ATOMICs + SYNC_VAL_COMPARE_AND_SWAP_1, + SYNC_VAL_COMPARE_AND_SWAP_2, + SYNC_VAL_COMPARE_AND_SWAP_4, + SYNC_VAL_COMPARE_AND_SWAP_8, + SYNC_LOCK_TEST_AND_SET_1, + SYNC_LOCK_TEST_AND_SET_2, + SYNC_LOCK_TEST_AND_SET_4, + SYNC_LOCK_TEST_AND_SET_8, + SYNC_FETCH_AND_ADD_1, + SYNC_FETCH_AND_ADD_2, + SYNC_FETCH_AND_ADD_4, + SYNC_FETCH_AND_ADD_8, + SYNC_FETCH_AND_SUB_1, + SYNC_FETCH_AND_SUB_2, + SYNC_FETCH_AND_SUB_4, + SYNC_FETCH_AND_SUB_8, + SYNC_FETCH_AND_AND_1, + SYNC_FETCH_AND_AND_2, + SYNC_FETCH_AND_AND_4, + SYNC_FETCH_AND_AND_8, + SYNC_FETCH_AND_OR_1, + SYNC_FETCH_AND_OR_2, + SYNC_FETCH_AND_OR_4, + SYNC_FETCH_AND_OR_8, + SYNC_FETCH_AND_XOR_1, + SYNC_FETCH_AND_XOR_2, + SYNC_FETCH_AND_XOR_4, + SYNC_FETCH_AND_XOR_8, + SYNC_FETCH_AND_NAND_1, + SYNC_FETCH_AND_NAND_2, + SYNC_FETCH_AND_NAND_4, + SYNC_FETCH_AND_NAND_8, + UNKNOWN_LIBCALL }; diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 97202bd..de49d18 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -29,7 +29,6 @@ namespace llvm { class AliasAnalysis; -class FunctionLoweringInfo; class MachineConstantPoolValue; class MachineFunction; class MDNode; @@ -134,7 +133,6 @@ class SelectionDAG { const TargetLowering &TLI; const TargetSelectionDAGInfo &TSI; MachineFunction *MF; - FunctionLoweringInfo &FLI; LLVMContext *Context; /// EntryNode - The starting token. @@ -187,7 +185,7 @@ class SelectionDAG { SelectionDAG(const SelectionDAG&); // Do not implement. public: - SelectionDAG(const TargetMachine &TM, FunctionLoweringInfo &fli); + explicit SelectionDAG(const TargetMachine &TM); ~SelectionDAG(); /// init - Prepare this SelectionDAG to process code in the given @@ -204,7 +202,6 @@ public: const TargetMachine &getTarget() const { return TM; } const TargetLowering &getTargetLoweringInfo() const { return TLI; } const TargetSelectionDAGInfo &getSelectionDAGInfo() const { return TSI; } - FunctionLoweringInfo &getFunctionLoweringInfo() const { return FLI; } LLVMContext *getContext() const {return Context; } /// viewGraph - Pop up a GraphViz/gv window with the DAG rendered using 'dot'. @@ -351,13 +348,13 @@ public: SDValue getTargetConstantFP(const ConstantFP &Val, EVT VT) { return getConstantFP(Val, VT, true); } - SDValue getGlobalAddress(const GlobalValue *GV, EVT VT, + SDValue getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t offset = 0, bool isTargetGA = false, unsigned char TargetFlags = 0); - SDValue getTargetGlobalAddress(const GlobalValue *GV, EVT VT, + SDValue getTargetGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t offset = 0, unsigned char TargetFlags = 0) { - return getGlobalAddress(GV, VT, offset, true, TargetFlags); + return getGlobalAddress(GV, DL, VT, offset, true, TargetFlags); } SDValue getFrameIndex(int FI, EVT VT, bool isTarget = false); SDValue getTargetFrameIndex(int FI, EVT VT) { @@ -585,7 +582,7 @@ public: /// getVAArg - VAArg produces a result and token chain, and takes a pointer /// and a source value as input. SDValue getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - SDValue SV); + SDValue SV, unsigned Align); /// getAtomic - Gets a node for an atomic op, produces result and chain and /// takes 3 operands @@ -635,18 +632,20 @@ public: SDValue getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, bool isVolatile, bool isNonTemporal, unsigned Alignment); - SDValue getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, + SDValue getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment); SDValue getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, - SDValue Offset, ISD::MemIndexedMode AM); - SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, - EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, + SDValue Offset, ISD::MemIndexedMode AM); + SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment); - SDValue getLoad(ISD::MemIndexedMode AM, DebugLoc dl, ISD::LoadExtType ExtType, - EVT VT, SDValue Chain, SDValue Ptr, SDValue Offset, + SDValue getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, + SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO); /// getStore - Helper function to build ISD::STORE nodes. @@ -681,15 +680,15 @@ public: /// already exists. If the resultant node does not exist in the DAG, the /// input node is returned. As a degenerate case, if you specify the same /// input operands as the node already has, the input node is returned. - SDValue UpdateNodeOperands(SDValue N, SDValue Op); - SDValue UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2); - SDValue UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDNode *UpdateNodeOperands(SDNode *N, SDValue Op); + SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2); + SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3); - SDValue UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4); - SDValue UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, + SDNode *UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5); - SDValue UpdateNodeOperands(SDValue N, + SDNode *UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps); /// SelectNodeTo - These are used for target selectors to *mutate* the diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 3817580..01d05dd 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -280,19 +280,16 @@ private: SDNode *MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTs, const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo); - void PrepareEHLandingPad(MachineBasicBlock *BB); + void PrepareEHLandingPad(); void SelectAllBasicBlocks(const Function &Fn); - void FinishBasicBlock(MachineBasicBlock *BB); + void FinishBasicBlock(); - MachineBasicBlock *SelectBasicBlock(MachineBasicBlock *BB, - const BasicBlock *LLVMBB, - BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall); - MachineBasicBlock *CodeGenAndEmitDAG(MachineBasicBlock *BB); + void SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall); + void CodeGenAndEmitDAG(); void LowerArguments(const BasicBlock *BB); - void ShrinkDemandedOps(); void ComputeLiveOutVRegInfo(); /// Create the scheduler. If a specific scheduler was specified diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index fd529b6..4cf6f36 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -549,6 +549,15 @@ public: return FoundNode; } + /// getFlaggedUser - If this node has a flag value with a user, return + /// the user (there is at most one). Otherwise return NULL. + SDNode *getFlaggedUser() const { + for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) + if (UI.getUse().get().getValueType() == MVT::Flag) + return *UI; + return 0; + } + /// getNumValues - Return the number of values defined/returned by this /// operator. /// @@ -1082,6 +1091,7 @@ public: uint64_t getZExtValue() const { return Value->getZExtValue(); } int64_t getSExtValue() const { return Value->getSExtValue(); } + bool isOne() const { return Value->isOne(); } bool isNullValue() const { return Value->isNullValue(); } bool isAllOnesValue() const { return Value->isAllOnesValue(); } @@ -1130,7 +1140,7 @@ public: } bool isExactlyValue(const APFloat& V) const; - bool isValueValidForType(EVT VT, const APFloat& Val); + static bool isValueValidForType(EVT VT, const APFloat& Val); static bool classof(const ConstantFPSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -1144,7 +1154,7 @@ class GlobalAddressSDNode : public SDNode { int64_t Offset; unsigned char TargetFlags; friend class SelectionDAG; - GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, EVT VT, + GlobalAddressSDNode(unsigned Opc, DebugLoc DL, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TargetFlags); public: @@ -1454,125 +1464,6 @@ public: } }; -namespace ISD { - struct ArgFlagsTy { - private: - static const uint64_t NoFlagSet = 0ULL; - static const uint64_t ZExt = 1ULL<<0; ///< Zero extended - static const uint64_t ZExtOffs = 0; - static const uint64_t SExt = 1ULL<<1; ///< Sign extended - static const uint64_t SExtOffs = 1; - static const uint64_t InReg = 1ULL<<2; ///< Passed in register - static const uint64_t InRegOffs = 2; - static const uint64_t SRet = 1ULL<<3; ///< Hidden struct-ret ptr - static const uint64_t SRetOffs = 3; - static const uint64_t ByVal = 1ULL<<4; ///< Struct passed by value - static const uint64_t ByValOffs = 4; - static const uint64_t Nest = 1ULL<<5; ///< Nested fn static chain - static const uint64_t NestOffs = 5; - static const uint64_t ByValAlign = 0xFULL << 6; //< Struct alignment - static const uint64_t ByValAlignOffs = 6; - static const uint64_t Split = 1ULL << 10; - static const uint64_t SplitOffs = 10; - static const uint64_t OrigAlign = 0x1FULL<<27; - static const uint64_t OrigAlignOffs = 27; - static const uint64_t ByValSize = 0xffffffffULL << 32; //< Struct size - static const uint64_t ByValSizeOffs = 32; - - static const uint64_t One = 1ULL; //< 1 of this type, for shifts - - uint64_t Flags; - public: - ArgFlagsTy() : Flags(0) { } - - bool isZExt() const { return Flags & ZExt; } - void setZExt() { Flags |= One << ZExtOffs; } - - bool isSExt() const { return Flags & SExt; } - void setSExt() { Flags |= One << SExtOffs; } - - bool isInReg() const { return Flags & InReg; } - void setInReg() { Flags |= One << InRegOffs; } - - bool isSRet() const { return Flags & SRet; } - void setSRet() { Flags |= One << SRetOffs; } - - bool isByVal() const { return Flags & ByVal; } - void setByVal() { Flags |= One << ByValOffs; } - - bool isNest() const { return Flags & Nest; } - void setNest() { Flags |= One << NestOffs; } - - unsigned getByValAlign() const { - return (unsigned) - ((One << ((Flags & ByValAlign) >> ByValAlignOffs)) / 2); - } - void setByValAlign(unsigned A) { - Flags = (Flags & ~ByValAlign) | - (uint64_t(Log2_32(A) + 1) << ByValAlignOffs); - } - - bool isSplit() const { return Flags & Split; } - void setSplit() { Flags |= One << SplitOffs; } - - unsigned getOrigAlign() const { - return (unsigned) - ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2); - } - void setOrigAlign(unsigned A) { - Flags = (Flags & ~OrigAlign) | - (uint64_t(Log2_32(A) + 1) << OrigAlignOffs); - } - - unsigned getByValSize() const { - return (unsigned)((Flags & ByValSize) >> ByValSizeOffs); - } - void setByValSize(unsigned S) { - Flags = (Flags & ~ByValSize) | (uint64_t(S) << ByValSizeOffs); - } - - /// getArgFlagsString - Returns the flags as a string, eg: "zext align:4". - std::string getArgFlagsString(); - - /// getRawBits - Represent the flags as a bunch of bits. - uint64_t getRawBits() const { return Flags; } - }; - - /// InputArg - This struct carries flags and type information about a - /// single incoming (formal) argument or incoming (from the perspective - /// of the caller) return value virtual register. - /// - struct InputArg { - ArgFlagsTy Flags; - EVT VT; - bool Used; - - InputArg() : VT(MVT::Other), Used(false) {} - InputArg(ISD::ArgFlagsTy flags, EVT vt, bool used) - : Flags(flags), VT(vt), Used(used) { - assert(VT.isSimple() && - "InputArg value type must be Simple!"); - } - }; - - /// OutputArg - This struct carries flags and a value for a - /// single outgoing (actual) argument or outgoing (from the perspective - /// of the caller) return value virtual register. - /// - struct OutputArg { - ArgFlagsTy Flags; - SDValue Val; - bool IsFixed; - - OutputArg() : IsFixed(false) {} - OutputArg(ISD::ArgFlagsTy flags, SDValue val, bool isfixed) - : Flags(flags), Val(val), IsFixed(isfixed) { - assert(Val.getValueType().isSimple() && - "OutputArg value type must be Simple!"); - } - }; -} - /// VTSDNode - This class is used to represent EVT's, which are used /// to parameterize some operations. class VTSDNode : public SDNode { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 3c56d0d..f1f047b 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -23,6 +23,7 @@ #define LLVM_CODEGEN_SLOTINDEXES_H #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/SmallVector.h" @@ -663,15 +664,20 @@ namespace llvm { MachineBasicBlock::iterator miItr(mi); bool needRenumber = false; IndexListEntry *newEntry; - + // Get previous index, considering that not all instructions are indexed. IndexListEntry *prevEntry; - if (miItr == mbb->begin()) { + for (;;) { // If mi is at the mbb beginning, get the prev index from the mbb. - prevEntry = &mbbRangeItr->second.first.entry(); - } else { - // Otherwise get it from the previous instr. - MachineBasicBlock::iterator pItr(prior(miItr)); - prevEntry = &getInstructionIndex(pItr).entry(); + if (miItr == mbb->begin()) { + prevEntry = &mbbRangeItr->second.first.entry(); + break; + } + // Otherwise rewind until we find a mapped instruction. + Mi2IndexMap::const_iterator itr = mi2iMap.find(--miItr); + if (itr != mi2iMap.end()) { + prevEntry = &itr->second.entry(); + break; + } } // Get next entry from previous entry. @@ -757,6 +763,47 @@ namespace llvm { mi2iMap.insert(std::make_pair(newMI, replaceBaseIndex)); } + /// Add the given MachineBasicBlock into the maps. + void insertMBBInMaps(MachineBasicBlock *mbb) { + MachineFunction::iterator nextMBB = + llvm::next(MachineFunction::iterator(mbb)); + IndexListEntry *startEntry = createEntry(0, 0); + IndexListEntry *terminatorEntry = createEntry(0, 0); + IndexListEntry *nextEntry = 0; + + if (nextMBB == mbb->getParent()->end()) { + nextEntry = getTail(); + } else { + nextEntry = &getMBBStartIdx(nextMBB).entry(); + } + + insert(nextEntry, startEntry); + insert(nextEntry, terminatorEntry); + + SlotIndex startIdx(startEntry, SlotIndex::LOAD); + SlotIndex terminatorIdx(terminatorEntry, SlotIndex::PHI_BIT); + SlotIndex endIdx(nextEntry, SlotIndex::LOAD); + + terminatorGaps.insert( + std::make_pair(mbb, terminatorIdx)); + + mbb2IdxMap.insert( + std::make_pair(mbb, std::make_pair(startIdx, endIdx))); + + idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb)); + + if (MachineFunction::iterator(mbb) != mbb->getParent()->begin()) { + // Have to update the end index of the previous block. + MachineBasicBlock *priorMBB = + llvm::prior(MachineFunction::iterator(mbb)); + mbb2IdxMap[priorMBB].second = startIdx; + } + + renumberIndexes(); + std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + + } + }; diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 99d2ab5..d12f82a 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -63,6 +63,9 @@ /* Define to 1 if you have the `closedir' function. */ #undef HAVE_CLOSEDIR +/* Define to 1 if you have the header file. */ +#undef HAVE_CRASHREPORTERCLIENT_H + /* Define to 1 if you have the header file. */ #undef HAVE_CTYPE_H diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index c3f1902..3287b39 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -174,8 +174,8 @@ public: CodeGenOpt::Level OptLevel = CodeGenOpt::Default, bool GVsWithCode = true, - CodeModel::Model CMM = - CodeModel::Default); + CodeModel::Model CMM = + CodeModel::Default); /// addModule - Add a Module to the list of modules that we can JIT from. /// Note that this takes ownership of the Module: when the ExecutionEngine is diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h index 658967d..d175080 100644 --- a/include/llvm/GlobalValue.h +++ b/include/llvm/GlobalValue.h @@ -40,6 +40,7 @@ public: InternalLinkage, ///< Rename collisions when linking (static functions). PrivateLinkage, ///< Like Internal, but omit from symbol table. LinkerPrivateLinkage, ///< Like Private, but linker removes. + LinkerPrivateWeakLinkage, ///< Like LinkerPrivate, but weak. DLLImportLinkage, ///< Function to be imported from DLL DLLExportLinkage, ///< Function to be accessible from DLL. ExternalWeakLinkage,///< ExternalWeak linkage description. @@ -132,11 +133,14 @@ public: return Linkage == PrivateLinkage; } static bool isLinkerPrivateLinkage(LinkageTypes Linkage) { - return Linkage==LinkerPrivateLinkage; + return Linkage == LinkerPrivateLinkage; + } + static bool isLinkerPrivateWeakLinkage(LinkageTypes Linkage) { + return Linkage == LinkerPrivateWeakLinkage; } static bool isLocalLinkage(LinkageTypes Linkage) { return isInternalLinkage(Linkage) || isPrivateLinkage(Linkage) || - isLinkerPrivateLinkage(Linkage); + isLinkerPrivateLinkage(Linkage) || isLinkerPrivateWeakLinkage(Linkage); } static bool isDLLImportLinkage(LinkageTypes Linkage) { return Linkage == DLLImportLinkage; @@ -158,7 +162,8 @@ public: return (Linkage == WeakAnyLinkage || Linkage == LinkOnceAnyLinkage || Linkage == CommonLinkage || - Linkage == ExternalWeakLinkage); + Linkage == ExternalWeakLinkage || + Linkage == LinkerPrivateWeakLinkage); } /// isWeakForLinker - Whether the definition of this global may be replaced at @@ -170,7 +175,8 @@ public: Linkage == LinkOnceAnyLinkage || Linkage == LinkOnceODRLinkage || Linkage == CommonLinkage || - Linkage == ExternalWeakLinkage); + Linkage == ExternalWeakLinkage || + Linkage == LinkerPrivateWeakLinkage); } bool hasExternalLinkage() const { return isExternalLinkage(Linkage); } @@ -187,6 +193,9 @@ public: bool hasInternalLinkage() const { return isInternalLinkage(Linkage); } bool hasPrivateLinkage() const { return isPrivateLinkage(Linkage); } bool hasLinkerPrivateLinkage() const { return isLinkerPrivateLinkage(Linkage); } + bool hasLinkerPrivateWeakLinkage() const { + return isLinkerPrivateWeakLinkage(Linkage); + } bool hasLocalLinkage() const { return isLocalLinkage(Linkage); } bool hasDLLImportLinkage() const { return isDLLImportLinkage(Linkage); } bool hasDLLExportLinkage() const { return isDLLExportLinkage(Linkage); } diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h index f4d125b..105b1bc 100644 --- a/include/llvm/InlineAsm.h +++ b/include/llvm/InlineAsm.h @@ -154,7 +154,8 @@ public: Op_InputChain = 0, Op_AsmString = 1, Op_MDNode = 2, - Op_FirstOperand = 3, + Op_IsAlignStack = 3, + Op_FirstOperand = 4, Kind_RegUse = 1, Kind_RegDef = 2, diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index 49cdd6a..6715416 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -612,7 +612,7 @@ public: /// A lossless cast is one that does not alter the basic value. It implies /// a no-op cast but is more stringent, preventing things like int->float, - /// long->double, int->ptr, or vector->anything. + /// long->double, or int->ptr. /// @returns true iff the cast is lossless. /// @brief Determine if this is a lossless cast. bool isLosslessCast() const; @@ -625,6 +625,14 @@ public: /// platform. Generally, the result of TargetData::getIntPtrType() should be /// passed in. If that's not available, use Type::Int64Ty, which will make /// the isNoopCast call conservative. + /// @brief Determine if the described cast is a no-op cast. + static bool isNoopCast( + Instruction::CastOps Opcode, ///< Opcode of cast + const Type *SrcTy, ///< SrcTy of cast + const Type *DstTy, ///< DstTy of cast + const Type *IntPtrTy ///< Integer type corresponding to Ptr types, or null + ); + /// @brief Determine if this cast is a no-op cast. bool isNoopCast( const Type *IntPtrTy ///< Integer type corresponding to pointer diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index 413a595..af93a29 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -235,6 +235,9 @@ public: void setAlignment(unsigned Align); + Value *getValueOperand() { return getOperand(0); } + const Value *getValueOperand() const { return getOperand(0); } + Value *getPointerOperand() { return getOperand(1); } const Value *getPointerOperand() const { return getOperand(1); } static unsigned getPointerOperandIndex() { return 1U; } @@ -883,14 +886,14 @@ public: InputIterator ArgBegin, InputIterator ArgEnd, const Twine &NameStr = "", Instruction *InsertBefore = 0) { - return new((unsigned)(ArgEnd - ArgBegin + 1)) + return new(unsigned(ArgEnd - ArgBegin + 1)) CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore); } template static CallInst *Create(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, const Twine &NameStr, BasicBlock *InsertAtEnd) { - return new((unsigned)(ArgEnd - ArgBegin + 1)) + return new(unsigned(ArgEnd - ArgBegin + 1)) CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd); } static CallInst *Create(Value *F, Value *Actual, @@ -919,6 +922,7 @@ public: static Instruction *CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, const Type *AllocTy, Value *AllocSize, Value *ArraySize = 0, + Function* MallocF = 0, const Twine &Name = ""); static Instruction *CreateMalloc(BasicBlock *InsertAtEnd, const Type *IntPtrTy, const Type *AllocTy, @@ -926,7 +930,7 @@ public: Function* MallocF = 0, const Twine &Name = ""); /// CreateFree - Generate the IR for a call to the builtin free function. - static void CreateFree(Value* Source, Instruction *InsertBefore); + static Instruction* CreateFree(Value* Source, Instruction *InsertBefore); static Instruction* CreateFree(Value* Source, BasicBlock *InsertAtEnd); ~CallInst(); @@ -937,8 +941,33 @@ public: unsigned(isTC)); } + /// @deprecated these "define hacks" will go away soon + /// @brief coerce out-of-tree code to abandon the low-level interfaces + /// @detail see below comments and update your code to high-level interfaces + /// - getOperand(0) ---> getCalledValue(), or possibly getCalledFunction + /// - setOperand(0, V) ---> setCalledFunction(V) + /// + /// in LLVM v2.8-only code + /// - getOperand(N+1) ---> getArgOperand(N) + /// - setOperand(N+1, V) ---> setArgOperand(N, V) + /// - getNumOperands() ---> getNumArgOperands()+1 // note the "+1"! + /// + /// in backward compatible code please consult llvm/Support/CallSite.h, + /// you should create a callsite using the CallInst pointer and call its + /// methods + /// +# define public private +# define protected private /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); +# undef public +# undef protected +public: + + enum { ArgOffset = 0 }; ///< temporary, do not use for new code! + unsigned getNumArgOperands() const { return getNumOperands() - 1; } + Value *getArgOperand(unsigned i) const { return getOperand(i + ArgOffset); } + void setArgOperand(unsigned i, Value *v) { setOperand(i + ArgOffset, v); } /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. @@ -974,7 +1003,7 @@ public: /// @brief Return true if the call should not be inlined. bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); } - void setIsNoInline(bool Value) { + void setIsNoInline(bool Value = true) { if (Value) addAttribute(~0, Attribute::NoInline); else removeAttribute(~0, Attribute::NoInline); } @@ -998,18 +1027,14 @@ public: } /// @brief Determine if the call cannot return. - bool doesNotReturn() const { - return paramHasAttr(~0, Attribute::NoReturn); - } + bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); } void setDoesNotReturn(bool DoesNotReturn = true) { if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn); else removeAttribute(~0, Attribute::NoReturn); } /// @brief Determine if the call cannot unwind. - bool doesNotThrow() const { - return paramHasAttr(~0, Attribute::NoUnwind); - } + bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); } void setDoesNotThrow(bool DoesNotThrow = true) { if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind); else removeAttribute(~0, Attribute::NoUnwind); @@ -1031,17 +1056,17 @@ public: /// indirect function invocation. /// Function *getCalledFunction() const { - return dyn_cast(Op<0>()); + return dyn_cast(Op()); } /// getCalledValue - Get a pointer to the function that is invoked by this /// instruction. - const Value *getCalledValue() const { return Op<0>(); } - Value *getCalledValue() { return Op<0>(); } + const Value *getCalledValue() const { return Op(); } + Value *getCalledValue() { return Op(); } /// setCalledFunction - Set the function called. void setCalledFunction(Value* Fn) { - Op<0>() = Fn; + Op() = Fn; } // Methods for support type inquiry through isa, cast, and dyn_cast: @@ -1071,7 +1096,7 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, ->getElementType())->getReturnType(), Instruction::Call, OperandTraits::op_end(this) - (ArgEnd - ArgBegin + 1), - (unsigned)(ArgEnd - ArgBegin + 1), InsertAtEnd) { + unsigned(ArgEnd - ArgBegin + 1), InsertAtEnd) { init(Func, ArgBegin, ArgEnd, NameStr, typename std::iterator_traits::iterator_category()); } @@ -1083,11 +1108,15 @@ CallInst::CallInst(Value *Func, InputIterator ArgBegin, InputIterator ArgEnd, ->getElementType())->getReturnType(), Instruction::Call, OperandTraits::op_end(this) - (ArgEnd - ArgBegin + 1), - (unsigned)(ArgEnd - ArgBegin + 1), InsertBefore) { + unsigned(ArgEnd - ArgBegin + 1), InsertBefore) { init(Func, ArgBegin, ArgEnd, NameStr, typename std::iterator_traits::iterator_category()); } + +// Note: if you get compile errors about private methods then +// please update your code to use the high-level operand +// interfaces. See line 943 above. DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value) //===----------------------------------------------------------------------===// @@ -2432,6 +2461,10 @@ public: /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); + unsigned getNumArgOperands() const { return getNumOperands() - 3; } + Value *getArgOperand(unsigned i) const { return getOperand(i); } + void setArgOperand(unsigned i, Value *v) { setOperand(i, v); } + /// getCallingConv/setCallingConv - Get or set the calling convention of this /// function call. CallingConv::ID getCallingConv() const { @@ -2465,11 +2498,11 @@ public: /// @brief Return true if the call should not be inlined. bool isNoInline() const { return paramHasAttr(~0, Attribute::NoInline); } - void setIsNoInline(bool Value) { + void setIsNoInline(bool Value = true) { if (Value) addAttribute(~0, Attribute::NoInline); else removeAttribute(~0, Attribute::NoInline); } - + /// @brief Determine if the call does not access memory. bool doesNotAccessMemory() const { return paramHasAttr(~0, Attribute::ReadNone); @@ -2489,18 +2522,14 @@ public: } /// @brief Determine if the call cannot return. - bool doesNotReturn() const { - return paramHasAttr(~0, Attribute::NoReturn); - } + bool doesNotReturn() const { return paramHasAttr(~0, Attribute::NoReturn); } void setDoesNotReturn(bool DoesNotReturn = true) { if (DoesNotReturn) addAttribute(~0, Attribute::NoReturn); else removeAttribute(~0, Attribute::NoReturn); } /// @brief Determine if the call cannot unwind. - bool doesNotThrow() const { - return paramHasAttr(~0, Attribute::NoUnwind); - } + bool doesNotThrow() const { return paramHasAttr(~0, Attribute::NoUnwind); } void setDoesNotThrow(bool DoesNotThrow = true) { if (DoesNotThrow) addAttribute(~0, Attribute::NoUnwind); else removeAttribute(~0, Attribute::NoUnwind); diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h index 5b0e90f..48f2da9 100644 --- a/include/llvm/IntrinsicInst.h +++ b/include/llvm/IntrinsicInst.h @@ -43,7 +43,7 @@ namespace llvm { Intrinsic::ID getIntrinsicID() const { return (Intrinsic::ID)getCalledFunction()->getIntrinsicID(); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const IntrinsicInst *) { return true; } static inline bool classof(const CallInst *I) { @@ -74,7 +74,7 @@ namespace llvm { static inline bool classof(const Value *V) { return isa(V) && classof(cast(V)); } - + static Value *StripCast(Value *C); }; @@ -83,7 +83,7 @@ namespace llvm { class DbgDeclareInst : public DbgInfoIntrinsic { public: Value *getAddress() const; - MDNode *getVariable() const { return cast(getOperand(2)); } + MDNode *getVariable() const { return cast(getArgOperand(1)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgDeclareInst *) { return true; } @@ -103,9 +103,9 @@ namespace llvm { Value *getValue(); uint64_t getOffset() const { return cast( - const_cast(getOperand(2)))->getZExtValue(); + const_cast(getArgOperand(1)))->getZExtValue(); } - MDNode *getVariable() const { return cast(getOperand(3)); } + MDNode *getVariable() const { return cast(getArgOperand(2)); } // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const DbgValueInst *) { return true; } @@ -121,22 +121,22 @@ namespace llvm { /// class MemIntrinsic : public IntrinsicInst { public: - Value *getRawDest() const { return const_cast(getOperand(1)); } + Value *getRawDest() const { return const_cast(getArgOperand(0)); } - Value *getLength() const { return const_cast(getOperand(3)); } + Value *getLength() const { return const_cast(getArgOperand(2)); } ConstantInt *getAlignmentCst() const { - return cast(const_cast(getOperand(4))); + return cast(const_cast(getArgOperand(3))); } - + unsigned getAlignment() const { return getAlignmentCst()->getZExtValue(); } ConstantInt *getVolatileCst() const { - return cast(const_cast(getOperand(5))); + return cast(const_cast(getArgOperand(4))); } bool isVolatile() const { - return getVolatileCst()->getZExtValue() != 0; + return !getVolatileCst()->isZero(); } /// getDest - This is just like getRawDest, but it strips off any cast @@ -149,27 +149,27 @@ namespace llvm { void setDest(Value *Ptr) { assert(getRawDest()->getType() == Ptr->getType() && "setDest called with pointer of wrong type!"); - setOperand(1, Ptr); + setArgOperand(0, Ptr); } void setLength(Value *L) { assert(getLength()->getType() == L->getType() && "setLength called with value of wrong type!"); - setOperand(3, L); + setArgOperand(2, L); } - + void setAlignment(Constant* A) { - setOperand(4, A); + setArgOperand(3, A); } void setVolatile(Constant* V) { - setOperand(5, V); + setArgOperand(4, V); } const Type *getAlignmentType() const { - return getOperand(4)->getType(); + return getArgOperand(3)->getType(); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemIntrinsic *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -192,14 +192,14 @@ namespace llvm { public: /// get* - Return the arguments to the instruction. /// - Value *getValue() const { return const_cast(getOperand(2)); } - + Value *getValue() const { return const_cast(getArgOperand(1)); } + void setValue(Value *Val) { assert(getValue()->getType() == Val->getType() && - "setSource called with pointer of wrong type!"); - setOperand(2, Val); + "setValue called with value of wrong type!"); + setArgOperand(1, Val); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemSetInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -209,26 +209,26 @@ namespace llvm { return isa(V) && classof(cast(V)); } }; - + /// MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics. /// class MemTransferInst : public MemIntrinsic { public: /// get* - Return the arguments to the instruction. /// - Value *getRawSource() const { return const_cast(getOperand(2)); } - + Value *getRawSource() const { return const_cast(getArgOperand(1)); } + /// getSource - This is just like getRawSource, but it strips off any cast /// instructions that feed it, giving the original input. The returned /// value is guaranteed to be a pointer. Value *getSource() const { return getRawSource()->stripPointerCasts(); } - + void setSource(Value *Ptr) { assert(getRawSource()->getType() == Ptr->getType() && "setSource called with pointer of wrong type!"); - setOperand(2, Ptr); + setArgOperand(1, Ptr); } - + // Methods for support type inquiry through isa, cast, and dyn_cast: static inline bool classof(const MemTransferInst *) { return true; } static inline bool classof(const IntrinsicInst *I) { @@ -239,8 +239,8 @@ namespace llvm { return isa(V) && classof(cast(V)); } }; - - + + /// MemCpyInst - This class wraps the llvm.memcpy intrinsic. /// class MemCpyInst : public MemTransferInst { @@ -282,7 +282,7 @@ namespace llvm { return isa(V) && classof(cast(V)); } }; - + /// MemoryUseIntrinsic - This is the common base class for the memory use /// marker intrinsics. /// diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 2b4df54..444f514 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -309,10 +309,8 @@ let Properties = [IntrNoMem] in { def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty]>; def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>; } -def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>, - GCCBuiltin<"__builtin_setjmp">; -def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>, - GCCBuiltin<"__builtin_longjmp">; +def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; +def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>; //===---------------- Generic Variable Attribute Intrinsics----------------===// // diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index c2375ea..876703b 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -113,6 +113,7 @@ namespace { (void) llvm::createSingleLoopExtractorPass(); (void) llvm::createStripSymbolsPass(); (void) llvm::createStripNonDebugSymbolsPass(); + (void) llvm::createStripDeadDebugInfoPass(); (void) llvm::createStripDeadPrototypesPass(); (void) llvm::createTailCallEliminationPass(); (void) llvm::createTailDuplicationPass(); diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index d9963ec..07ca070 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -354,7 +354,7 @@ public: typedef FragmentListType::reverse_iterator reverse_iterator; private: - iplist Fragments; + FragmentListType Fragments; const MCSection *Section; /// Ordinal - The section index in the assemblers section list. @@ -641,7 +641,7 @@ public: /// in the symbol table, or whether it can be discarded by the assembler. This /// also effects whether the assembler treats the label as potentially /// defining a separate atom. - bool isSymbolLinkerVisible(const MCSymbolData *SD) const; + bool isSymbolLinkerVisible(const MCSymbol &SD) const; /// Emit the section contents using the given object writer. // diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 03b5fb0..a57b5bf 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -14,6 +14,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { class MCAsmInfo; @@ -54,6 +55,17 @@ namespace llvm { /// for the LocalLabelVal and adds it to the map if needed. unsigned GetInstance(int64_t LocalLabelVal); + /// The file name of the log file from the enviromment variable + /// AS_SECURE_LOG_FILE. Which must be set before the .secure_log_unique + /// directive is used or it is an error. + char *SecureLogFile; + /// The stream that gets written to for the .secure_log_unique directive. + raw_ostream *SecureLog; + /// Boolean toggled when .secure_log_unique / .secure_log_reset is seen to + /// catch errors if .secure_log_unique appears twice without + /// .secure_log_reset appearing between them. + bool SecureLogUsed; + /// Allocator - Allocator object used for creating machine code objects. /// /// We use a bump pointer allocator to avoid the need to track all allocated @@ -127,6 +139,16 @@ namespace llvm { /// @} + char *getSecureLogFile() { return SecureLogFile; } + raw_ostream *getSecureLog() { return SecureLog; } + bool getSecureLogUsed() { return SecureLogUsed; } + void setSecureLog(raw_ostream *Value) { + SecureLog = Value; + } + void setSecureLogUsed(bool Value) { + SecureLogUsed = Value; + } + void *Allocate(unsigned Size, unsigned Align = 8) { return Allocator.Allocate(Size, Align); } diff --git a/include/llvm/MC/MCDirectives.h b/include/llvm/MC/MCDirectives.h index 1f7364d..223b09e 100644 --- a/include/llvm/MC/MCDirectives.h +++ b/include/llvm/MC/MCDirectives.h @@ -38,7 +38,8 @@ enum MCSymbolAttr { MCSA_Reference, ///< .reference (MachO) MCSA_Weak, ///< .weak MCSA_WeakDefinition, ///< .weak_definition (MachO) - MCSA_WeakReference ///< .weak_reference (MachO) + MCSA_WeakReference, ///< .weak_reference (MachO) + MCSA_WeakDefAutoPrivate ///< .weak_def_can_be_hidden (MachO) }; enum MCAssemblerFlag { diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h new file mode 100644 index 0000000..7b9ff00 --- /dev/null +++ b/include/llvm/MC/MCObjectStreamer.h @@ -0,0 +1,56 @@ +//===- MCObjectStreamer.h - MCStreamer Object File Interface ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCOBJECTSTREAMER_H +#define LLVM_MC_MCOBJECTSTREAMER_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class MCAssembler; +class MCCodeEmitter; +class MCSectionData; +class TargetAsmBackend; +class raw_ostream; + +/// \brief Streaming object file generation interface. +/// +/// This class provides an implementation of the MCStreamer interface which is +/// suitable for use with the assembler backend. Specific object file formats +/// are expected to subclass this interface to implement directives specific +/// to that file format or custom semantics expected by the object writer +/// implementation. +class MCObjectStreamer : public MCStreamer { + MCAssembler *Assembler; + MCSectionData *CurSectionData; + +protected: + MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter); + ~MCObjectStreamer(); + + MCSectionData *getCurrentSectionData() const { + return CurSectionData; + } + +public: + MCAssembler &getAssembler() { return *Assembler; } + + /// @name MCStreamer Interface + /// @{ + + virtual void SwitchSection(const MCSection *Section); + virtual void Finish(); + + /// @} +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/MC/MCObjectWriter.h b/include/llvm/MC/MCObjectWriter.h index e900584..22eea7e 100644 --- a/include/llvm/MC/MCObjectWriter.h +++ b/include/llvm/MC/MCObjectWriter.h @@ -162,6 +162,8 @@ public: /// @} }; +MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS); + } // End llvm namespace #endif diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index cf6eefb..2187889 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -33,8 +33,6 @@ class AsmLexer : public MCAsmLexer { const char *CurPtr; const MemoryBuffer *CurBuf; - const char *TokStart; - void operator=(const AsmLexer&); // DO NOT IMPLEMENT AsmLexer(const AsmLexer&); // DO NOT IMPLEMENT @@ -48,9 +46,7 @@ public: void setBuffer(const MemoryBuffer *buf, const char *ptr = NULL); - SMLoc getLoc() const; - - StringRef LexUntilEndOfStatement(); + virtual StringRef LexUntilEndOfStatement(); bool isAtStartOfComment(char Char); diff --git a/include/llvm/MC/MCParser/AsmParser.h b/include/llvm/MC/MCParser/AsmParser.h index e929fd1..82b120b 100644 --- a/include/llvm/MC/MCParser/AsmParser.h +++ b/include/llvm/MC/MCParser/AsmParser.h @@ -26,6 +26,7 @@ namespace llvm { class AsmCond; class AsmToken; +class MCAsmParserExtension; class MCContext; class MCExpr; class MCInst; @@ -36,11 +37,15 @@ class TargetAsmParser; class Twine; class AsmParser : public MCAsmParser { + AsmParser(const AsmParser &); // DO NOT IMPLEMENT + void operator=(const AsmParser &); // DO NOT IMPLEMENT private: AsmLexer Lexer; MCContext &Ctx; MCStreamer &Out; SourceMgr &SrcMgr; + MCAsmParserExtension *GenericParser; + MCAsmParserExtension *PlatformParser; TargetAsmParser *TargetParser; /// This is the current buffer index we're lexing from as managed by the @@ -54,26 +59,28 @@ private: /// invoked after the directive identifier is read and is responsible for /// parsing and validating the rest of the directive. The handler is passed /// in the directive name and the location of the directive keyword. - StringMap DirectiveMap; + StringMap > DirectiveMap; public: - AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, + AsmParser(const Target &T, SourceMgr &SM, MCContext &Ctx, MCStreamer &Out, const MCAsmInfo &MAI); ~AsmParser(); bool Run(bool NoInitialTextSection, bool NoFinalize = false); - - void AddDirectiveHandler(StringRef Directive, - bool (AsmParser::*Handler)(StringRef, SMLoc)) { - DirectiveMap[Directive] = Handler; + void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) { + DirectiveMap[Directive] = std::make_pair(Object, Handler); } + public: TargetAsmParser &getTargetParser() const { return *TargetParser; } - void setTargetParser(TargetAsmParser &P) { TargetParser = &P; } + void setTargetParser(TargetAsmParser &P); /// @name MCAsmParser Interface /// { + virtual SourceMgr &getSourceManager() { return SrcMgr; } virtual MCAsmLexer &getLexer() { return Lexer; } virtual MCContext &getContext() { return Ctx; } virtual MCStreamer &getStreamer() { return Out; } @@ -91,12 +98,8 @@ public: /// } private: - MCSymbol *CreateSymbol(StringRef Name); - bool ParseStatement(); - bool TokError(const char *Msg); - void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; /// EnterIncludeFile - Enter the specified file. This returns true on failure. @@ -115,10 +118,6 @@ private: bool ParseIdentifier(StringRef &Res); // Directive Parsing. - bool ParseDirectiveDarwinSection(); // Darwin specific ".section". - bool ParseDirectiveSectionSwitch(const char *Segment, const char *Section, - unsigned TAA = 0, unsigned ImplicitAlign = 0, - unsigned StubSize = 0); bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz" bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... bool ParseDirectiveFill(); // ".fill" @@ -132,17 +131,8 @@ private: /// accepts a single symbol (which should be a label or an external). bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); bool ParseDirectiveELFType(); // ELF specific ".type" - bool ParseDirectiveDarwinSymbolDesc(); // Darwin specific ".desc" - bool ParseDirectiveDarwinLsym(); // Darwin specific ".lsym" bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" - bool ParseDirectiveDarwinZerofill(); // Darwin specific ".zerofill" - bool ParseDirectiveDarwinTBSS(); // Darwin specific ".tbss" - - // Darwin specific ".subsections_via_symbols" - bool ParseDirectiveDarwinSubsectionsViaSymbols(); - // Darwin specific .dump and .load - bool ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump); bool ParseDirectiveAbort(); // ".abort" bool ParseDirectiveInclude(); // ".include" @@ -152,10 +142,6 @@ private: bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif - bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" - bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" - bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" - /// ParseEscapedString - Parse the current token as a string which may include /// escaped characters and return the string contents. bool ParseEscapedString(std::string &Data); diff --git a/include/llvm/MC/MCParser/MCAsmLexer.h b/include/llvm/MC/MCParser/MCAsmLexer.h index bd1496f..d690e81 100644 --- a/include/llvm/MC/MCParser/MCAsmLexer.h +++ b/include/llvm/MC/MCParser/MCAsmLexer.h @@ -121,6 +121,8 @@ class MCAsmLexer { MCAsmLexer(const MCAsmLexer &); // DO NOT IMPLEMENT void operator=(const MCAsmLexer &); // DO NOT IMPLEMENT protected: // Can only create subclasses. + const char *TokStart; + MCAsmLexer(); virtual AsmToken LexToken() = 0; @@ -141,6 +143,11 @@ public: return CurTok = LexToken(); } + virtual StringRef LexUntilEndOfStatement() = 0; + + /// getLoc - Get the current source location. + SMLoc getLoc() const; + /// getTok - Get the current (last) lexed token. const AsmToken &getTok() { return CurTok; diff --git a/include/llvm/MC/MCParser/MCAsmParser.h b/include/llvm/MC/MCParser/MCAsmParser.h index 7f7f1b6..d0ccd0f 100644 --- a/include/llvm/MC/MCParser/MCAsmParser.h +++ b/include/llvm/MC/MCParser/MCAsmParser.h @@ -15,35 +15,48 @@ namespace llvm { class AsmToken; class MCAsmLexer; +class MCAsmParserExtension; class MCContext; class MCExpr; class MCStreamer; class SMLoc; +class SourceMgr; +class StringRef; class Twine; /// MCAsmParser - Generic assembler parser interface, for use by target specific /// assembly parsers. class MCAsmParser { +public: + typedef bool (MCAsmParserExtension::*DirectiveHandler)(StringRef, SMLoc); + +private: MCAsmParser(const MCAsmParser &); // DO NOT IMPLEMENT void operator=(const MCAsmParser &); // DO NOT IMPLEMENT protected: // Can only create subclasses. MCAsmParser(); - + public: virtual ~MCAsmParser(); + virtual void AddDirectiveHandler(MCAsmParserExtension *Object, + StringRef Directive, + DirectiveHandler Handler) = 0; + + virtual SourceMgr &getSourceManager() = 0; + virtual MCAsmLexer &getLexer() = 0; virtual MCContext &getContext() = 0; - /// getSteamer - Return the output streamer for the assembler. + /// getStreamer - Return the output streamer for the assembler. virtual MCStreamer &getStreamer() = 0; /// Warning - Emit a warning at the location \arg L, with the message \arg /// Msg. virtual void Warning(SMLoc L, const Twine &Msg) = 0; - /// Warning - Emit an error at the location \arg L, with the message \arg + /// Error - Emit an error at the location \arg L, with the message \arg /// Msg. /// /// \return The return value is always true, as an idiomatic convenience to @@ -53,10 +66,17 @@ public: /// Lex - Get the next AsmToken in the stream, possibly handling file /// inclusion first. virtual const AsmToken &Lex() = 0; - + /// getTok - Get the current AsmToken from the stream. const AsmToken &getTok(); - + + /// \brief Report an error at the current lexer location. + bool TokError(const char *Msg); + + /// ParseIdentifier - Parse an identifier or string (as a quoted identifier) + /// and set \arg Res to the identifier contents. + virtual bool ParseIdentifier(StringRef &Res) = 0; + /// ParseExpression - Parse an arbitrary expression. /// /// @param Res - The value of the expression. The result is undefined @@ -64,7 +84,7 @@ public: /// @result - False on success. virtual bool ParseExpression(const MCExpr *&Res, SMLoc &EndLoc) = 0; bool ParseExpression(const MCExpr *&Res); - + /// ParseParenExpression - Parse an arbitrary expression, assuming that an /// initial '(' has already been consumed. /// diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h new file mode 100644 index 0000000..ad9ccf7 --- /dev/null +++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -0,0 +1,66 @@ +//===-- llvm/MC/MCAsmParserExtension.h - Asm Parser Hooks -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCASMPARSEREXTENSION_H +#define LLVM_MC_MCASMPARSEREXTENSION_H + +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/Support/SMLoc.h" + +namespace llvm { + +/// \brief Generic interface for extending the MCAsmParser, +/// which is implemented by target and object file assembly parser +/// implementations. +class MCAsmParserExtension { + MCAsmParserExtension(const MCAsmParserExtension &); // DO NOT IMPLEMENT + void operator=(const MCAsmParserExtension &); // DO NOT IMPLEMENT + + MCAsmParser *Parser; + +protected: + MCAsmParserExtension(); + +public: + virtual ~MCAsmParserExtension(); + + /// \brief Initialize the extension for parsing using the given \arg + /// Parser. The extension should use the AsmParser interfaces to register its + /// parsing routines. + virtual void Initialize(MCAsmParser &Parser); + + /// @name MCAsmParser Proxy Interfaces + /// @{ + + MCContext &getContext() { return getParser().getContext(); } + MCAsmLexer &getLexer() { return getParser().getLexer(); } + MCAsmParser &getParser() { return *Parser; } + SourceMgr &getSourceManager() { return getParser().getSourceManager(); } + MCStreamer &getStreamer() { return getParser().getStreamer(); } + void Warning(SMLoc L, const Twine &Msg) { + return getParser().Warning(L, Msg); + } + bool Error(SMLoc L, const Twine &Msg) { + return getParser().Error(L, Msg); + } + + const AsmToken &Lex() { return getParser().Lex(); } + + const AsmToken &getTok() { return getParser().getTok(); } + + bool TokError(const char *Msg) { + return getParser().TokError(Msg); + } + + /// @} +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 808767c..5c99735 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -23,7 +23,7 @@ namespace llvm { class MCContext; class MCAsmInfo; class raw_ostream; - + /// MCSection - Instances of this class represent a uniqued identifier for a /// section in the current translation unit. The MCContext class uniques and /// creates these. @@ -49,7 +49,7 @@ namespace llvm { SectionKind getKind() const { return Kind; } SectionVariant getVariant() const { return Variant; } - + virtual void PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS) const = 0; @@ -63,7 +63,7 @@ namespace llvm { static bool classof(const MCSection *) { return true; } }; - + } // end namespace llvm #endif diff --git a/include/llvm/MC/MCSectionCOFF.h b/include/llvm/MC/MCSectionCOFF.h index 938a388..f828e10 100644 --- a/include/llvm/MC/MCSectionCOFF.h +++ b/include/llvm/MC/MCSectionCOFF.h @@ -16,6 +16,8 @@ #include "llvm/MC/MCSection.h" +#include "llvm/Support/COFF.h" + namespace llvm { /// MCSectionCOFF - This represents a section on Windows @@ -47,56 +49,6 @@ namespace llvm { /// should be printed before the section name bool ShouldOmitSectionDirective(StringRef Name, const MCAsmInfo &MAI) const; - //FIXME: all COFF enumerations/flags should be standardized into one place... - // Target/X86COFF.h doesn't seem right as COFF can be used for other targets, - // MC/WinCOFF.h maybe right as it isn't target or entity specific, and it is - // pretty low on the dependancy graph (is there any need to support non - // windows COFF?) - // here is good for section stuff, but others should go elsewhere - - /// Valid section flags. - enum { - IMAGE_SCN_TYPE_NO_PAD = 0x00000008, - IMAGE_SCN_CNT_CODE = 0x00000020, - IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, - IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, - IMAGE_SCN_LNK_OTHER = 0x00000100, - IMAGE_SCN_LNK_INFO = 0x00000200, - IMAGE_SCN_LNK_REMOVE = 0x00000800, - IMAGE_SCN_LNK_COMDAT = 0x00001000, - IMAGE_SCN_MEM_FARDATA = 0x00008000, - IMAGE_SCN_MEM_PURGEABLE = 0x00020000, - IMAGE_SCN_MEM_16BIT = 0x00020000, - IMAGE_SCN_MEM_LOCKED = 0x00040000, - IMAGE_SCN_MEM_PRELOAD = 0x00080000, - /* these are handled elsewhere - IMAGE_SCN_ALIGN_1BYTES = 0x00100000, - IMAGE_SCN_ALIGN_2BYTES = 0x00200000, - IMAGE_SCN_ALIGN_4BYTES = 0x00300000, - IMAGE_SCN_ALIGN_8BYTES = 0x00400000, - IMAGE_SCN_ALIGN_16BYTES = 0x00500000, - IMAGE_SCN_ALIGN_32BYTES = 0x00600000, - IMAGE_SCN_ALIGN_64BYTES = 0x00700000, - */ - IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, - IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, - IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, - IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, - IMAGE_SCN_MEM_SHARED = 0x10000000, - IMAGE_SCN_MEM_EXECUTE = 0x20000000, - IMAGE_SCN_MEM_READ = 0x40000000, - IMAGE_SCN_MEM_WRITE = 0x80000000 - }; - - enum { - IMAGE_COMDAT_SELECT_NODUPLICATES = 1, - IMAGE_COMDAT_SELECT_ANY, - IMAGE_COMDAT_SELECT_SAME_SIZE, - IMAGE_COMDAT_SELECT_EXACT_MATCH, - IMAGE_COMDAT_SELECT_ASSOCIATIVE, - IMAGE_COMDAT_SELECT_LARGEST - }; - StringRef getSectionName() const { return SectionName; } unsigned getCharacteristics() const { return Characteristics; } int getSelection () const { return Selection; } diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 0783159..aca7dd3 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -314,7 +314,7 @@ namespace llvm { virtual void EmitRawText(StringRef String); void EmitRawText(const Twine &String); - /// Finish - Finish emission of machine code and flush any output. + /// Finish - Finish emission of machine code. virtual void Finish() = 0; }; @@ -341,12 +341,18 @@ namespace llvm { MCCodeEmitter *CE = 0, bool ShowInst = false); - /// createMachOStreamer - Create a machine code streamer which will generative + /// createMachOStreamer - Create a machine code streamer which will generate /// Mach-O format object files. MCStreamer *createMachOStreamer(MCContext &Ctx, TargetAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *CE, bool RelaxAll = false); + /// createWinCOFFStreamer - Create a machine code streamer which will + /// generate Microsoft COFF format object files. + MCStreamer *createWinCOFFStreamer(MCContext &Ctx, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, raw_ostream &OS); + /// createLoggingStreamer - Create a machine code streamer which just logs the /// API calls and then dispatches to another streamer. /// diff --git a/include/llvm/MC/SectionKind.h b/include/llvm/MC/SectionKind.h index c9557f2..85a91c6 100644 --- a/include/llvm/MC/SectionKind.h +++ b/include/llvm/MC/SectionKind.h @@ -29,10 +29,10 @@ class SectionKind { enum Kind { /// Metadata - Debug info sections or other metadata. Metadata, - + /// Text - Text section, used for functions and other executable code. Text, - + /// ReadOnly - Data that is never written to at program runtime by the /// program or the dynamic linker. Things in the top-level readonly /// SectionKind are not mergeable. @@ -45,7 +45,7 @@ class SectionKind { /// Mergeable1ByteCString - 1 byte mergable, null terminated, string. Mergeable1ByteCString, - + /// Mergeable2ByteCString - 2 byte mergable, null terminated, string. Mergeable2ByteCString, @@ -56,11 +56,11 @@ class SectionKind { /// constants together. For example, this can be used to unique /// constant pool entries etc. MergeableConst, - + /// MergeableConst4 - This is a section used by 4-byte constants, /// for example, floats. MergeableConst4, - + /// MergeableConst8 - This is a section used by 8-byte constants, /// for example, doubles. MergeableConst8, @@ -68,33 +68,33 @@ class SectionKind { /// MergeableConst16 - This is a section used by 16-byte constants, /// for example, vectors. MergeableConst16, - + /// Writeable - This is the base of all segments that need to be written /// to during program runtime. - + /// ThreadLocal - This is the base of all TLS segments. All TLS /// objects must be writeable, otherwise there is no reason for them to /// be thread local! - + /// ThreadBSS - Zero-initialized TLS data objects. ThreadBSS, - + /// ThreadData - Initialized TLS data objects. ThreadData, - + /// GlobalWriteableData - Writeable data that is global (not thread /// local). - + /// BSS - Zero initialized writeable data. BSS, - + /// BSSLocal - This is BSS (zero initialized and writable) data /// which has local linkage. BSSLocal, - + /// BSSExtern - This is BSS data with normal external linkage. BSSExtern, - + /// Common - Data with common linkage. These represent tentative /// definitions, which always have a zero initializer and are never /// marked 'constant'. @@ -123,20 +123,20 @@ class SectionKind { /// mark the pages these globals end up on as read-only after it is /// done with its relocation phase. ReadOnlyWithRel, - + /// ReadOnlyWithRelLocal - This is data that is readonly by the /// program, but must be writeable so that the dynamic linker /// can perform relocations in it. This is used when we know /// that all the relocations are to globals in this final /// linked image. ReadOnlyWithRelLocal - + } K : 8; public: - + bool isMetadata() const { return K == Metadata; } bool isText() const { return K == Text; } - + bool isReadOnly() const { return K == ReadOnly || isMergeableCString() || isMergeableConst(); @@ -149,7 +149,7 @@ public: bool isMergeable1ByteCString() const { return K == Mergeable1ByteCString; } bool isMergeable2ByteCString() const { return K == Mergeable2ByteCString; } bool isMergeable4ByteCString() const { return K == Mergeable4ByteCString; } - + bool isMergeableConst() const { return K == MergeableConst || K == MergeableConst4 || K == MergeableConst8 || K == MergeableConst16; @@ -157,38 +157,38 @@ public: bool isMergeableConst4() const { return K == MergeableConst4; } bool isMergeableConst8() const { return K == MergeableConst8; } bool isMergeableConst16() const { return K == MergeableConst16; } - + bool isWriteable() const { return isThreadLocal() || isGlobalWriteableData(); } - + bool isThreadLocal() const { return K == ThreadData || K == ThreadBSS; } - - bool isThreadBSS() const { return K == ThreadBSS; } - bool isThreadData() const { return K == ThreadData; } + + bool isThreadBSS() const { return K == ThreadBSS; } + bool isThreadData() const { return K == ThreadData; } bool isGlobalWriteableData() const { return isBSS() || isCommon() || isDataRel() || isReadOnlyWithRel(); } - + bool isBSS() const { return K == BSS || K == BSSLocal || K == BSSExtern; } bool isBSSLocal() const { return K == BSSLocal; } bool isBSSExtern() const { return K == BSSExtern; } - + bool isCommon() const { return K == Common; } - + bool isDataRel() const { return K == DataRel || K == DataRelLocal || K == DataNoRel; } - + bool isDataRelLocal() const { return K == DataRelLocal || K == DataNoRel; } bool isDataNoRel() const { return K == DataNoRel; } - + bool isReadOnlyWithRel() const { return K == ReadOnlyWithRel || K == ReadOnlyWithRelLocal; } @@ -196,14 +196,14 @@ public: bool isReadOnlyWithRelLocal() const { return K == ReadOnlyWithRelLocal; } -private: +private: static SectionKind get(Kind K) { SectionKind Res; Res.K = K; return Res; } public: - + static SectionKind getMetadata() { return get(Metadata); } static SectionKind getText() { return get(Text); } static SectionKind getReadOnly() { return get(ReadOnly); } @@ -234,7 +234,7 @@ public: return get(ReadOnlyWithRelLocal); } }; - + } // end namespace llvm #endif diff --git a/include/llvm/Module.h b/include/llvm/Module.h index 901fada..5fc0418 100644 --- a/include/llvm/Module.h +++ b/include/llvm/Module.h @@ -197,11 +197,11 @@ public: /// Get any module-scope inline assembly blocks. /// @returns a string containing the module-scope inline assembly blocks. const std::string &getModuleInlineAsm() const { return GlobalScopeAsm; } - + /// @} /// @name Module Level Mutators /// @{ - + /// Set the module identifier. void setModuleIdentifier(StringRef ID) { ModuleID = ID; } @@ -235,12 +235,12 @@ public: /// getMDKindID - Return a unique non-zero ID for the specified metadata kind. /// This ID is uniqued across modules in the current LLVMContext. unsigned getMDKindID(StringRef Name) const; - + /// getMDKindNames - Populate client supplied SmallVector with the name for /// custom metadata IDs registered in this LLVMContext. ID #0 is not used, /// so it is filled in as an empty string. void getMDKindNames(SmallVectorImpl &Result) const; - + /// @} /// @name Function Accessors /// @{ @@ -277,7 +277,7 @@ public: Constant *getOrInsertTargetIntrinsic(StringRef Name, const FunctionType *Ty, AttrListPtr AttributeList); - + /// getFunction - Look up the specified function in the module symbol table. /// If it does not exist, return null. Function *getFunction(StringRef Name) const; @@ -321,14 +321,14 @@ public: /// @} /// @name Named Metadata Accessors /// @{ - + /// getNamedMetadata - Return the first NamedMDNode in the module with the - /// specified name. This method returns null if a NamedMDNode with the + /// specified name. This method returns null if a NamedMDNode with the /// specified name is not found. - NamedMDNode *getNamedMetadata(StringRef Name) const; + NamedMDNode *getNamedMetadata(const Twine &Name) const; - /// getOrInsertNamedMetadata - Return the first named MDNode in the module - /// with the specified name. This method returns a new NamedMDNode if a + /// getOrInsertNamedMetadata - Return the first named MDNode in the module + /// with the specified name. This method returns a new NamedMDNode if a /// NamedMDNode with the specified name is not found. NamedMDNode *getOrInsertNamedMetadata(StringRef Name); @@ -515,15 +515,16 @@ public: const_named_metadata_iterator named_metadata_begin() const { return NamedMDList.begin(); } - + /// Get an iterator to the last named metadata. named_metadata_iterator named_metadata_end() { return NamedMDList.end(); } /// Get a constant iterator to the last named metadata. const_named_metadata_iterator named_metadata_end() const { return NamedMDList.end(); } - - /// Determine how many NamedMDNodes are in the Module's list of named metadata. + + /// Determine how many NamedMDNodes are in the Module's list of named + /// metadata. size_t named_metadata_size() const { return NamedMDList.size(); } /// Determine if the list of named metadata is empty. bool named_metadata_empty() const { return NamedMDList.empty(); } @@ -535,7 +536,7 @@ public: /// Print the module to an output stream with AssemblyAnnotationWriter. void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const; - + /// Dump the module to stderr (for debugging). void dump() const; /// This function causes all the subinstructions to "let go" of all references diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index 8d0c47d..5a58931 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -31,7 +31,6 @@ #include "llvm/System/DataTypes.h" -#include #include #include #include @@ -89,13 +88,8 @@ class Pass { Pass(const Pass &); // DO NOT IMPLEMENT public: - explicit Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) { - assert(pid && "pid cannot be 0"); - } - explicit Pass(PassKind K, const void *pid) - : Resolver(0), PassID((intptr_t)pid), Kind(K) { - assert(pid && "pid cannot be 0"); - } + explicit Pass(PassKind K, intptr_t pid); + explicit Pass(PassKind K, const void *pid); virtual ~Pass(); @@ -138,13 +132,8 @@ public: virtual PassManagerType getPotentialPassManagerType() const; // Access AnalysisResolver - inline void setResolver(AnalysisResolver *AR) { - assert(!Resolver && "Resolver is already set"); - Resolver = AR; - } - inline AnalysisResolver *getResolver() { - return Resolver; - } + void setResolver(AnalysisResolver *AR); + AnalysisResolver *getResolver() const { return Resolver; } /// getAnalysisUsage - This function should be overriden by passes that need /// analysis information to do their job. If a pass specifies that it uses a @@ -170,11 +159,9 @@ public: /// an analysis interface through multiple inheritance. If needed, it should /// override this to adjust the this pointer as needed for the specified pass /// info. - virtual void *getAdjustedAnalysisPointer(const PassInfo *) { - return this; - } - virtual ImmutablePass *getAsImmutablePass() { return 0; } - virtual PMDataManager *getAsPMDataManager() { return 0; } + virtual void *getAdjustedAnalysisPointer(const PassInfo *); + virtual ImmutablePass *getAsImmutablePass(); + virtual PMDataManager *getAsPMDataManager(); /// verifyAnalysis() - This member can be implemented by a analysis pass to /// check state of analysis information. diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index d59be3c..977d4f4 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -49,22 +49,13 @@ public: // addRequired - Add the specified ID to the required set of the usage info // for a pass. // - AnalysisUsage &addRequiredID(AnalysisID ID) { - assert(ID && "Pass class not registered!"); - Required.push_back(ID); - return *this; - } + AnalysisUsage &addRequiredID(AnalysisID ID); template AnalysisUsage &addRequired() { return addRequiredID(Pass::getClassPassInfo()); } - AnalysisUsage &addRequiredTransitiveID(AnalysisID ID) { - assert(ID && "Pass class not registered!"); - Required.push_back(ID); - RequiredTransitive.push_back(ID); - return *this; - } + AnalysisUsage &addRequiredTransitiveID(AnalysisID ID); template AnalysisUsage &addRequiredTransitive() { AnalysisID ID = Pass::getClassPassInfo(); diff --git a/include/llvm/PassManagers.h b/include/llvm/PassManagers.h index ed1e80e..81b7e7a 100644 --- a/include/llvm/PassManagers.h +++ b/include/llvm/PassManagers.h @@ -302,10 +302,7 @@ public: /// through getAnalysis interface. virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass); - virtual Pass * getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) { - assert (0 && "Unable to find on the fly pass"); - return NULL; - } + virtual Pass *getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F); /// Initialize available analysis information. void initializeAnalysisInfo() { diff --git a/include/llvm/PassSupport.h b/include/llvm/PassSupport.h index b229989..b018351 100644 --- a/include/llvm/PassSupport.h +++ b/include/llvm/PassSupport.h @@ -109,13 +109,7 @@ public: } /// createPass() - Use this method to create an instance of this pass. - Pass *createPass() const { - assert((!isAnalysisGroup() || NormalCtor) && - "No default implementation found for analysis group!"); - assert(NormalCtor && - "Cannot call createPass on PassInfo without default ctor!"); - return NormalCtor(); - } + Pass *createPass() const; /// addInterfaceImplemented - This method is called when this pass is /// registered as a member of an analysis group with the RegisterAnalysisGroup diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h index f07c719..9ba71fc 100644 --- a/include/llvm/Support/CFG.h +++ b/include/llvm/Support/CFG.h @@ -53,7 +53,7 @@ public: assert(!It.atEnd() && "pred_iterator out of range!"); return cast(*It)->getParent(); } - inline pointer *operator->() const { return &(operator*()); } + inline pointer *operator->() const { return &operator*(); } inline Self& operator++() { // Preincrement assert(!It.atEnd() && "pred_iterator out of range!"); diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h new file mode 100644 index 0000000..2d4e054 --- /dev/null +++ b/include/llvm/Support/COFF.h @@ -0,0 +1,183 @@ +//===-- llvm/Support/COFF.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an definitions used in Windows COFF Files. +// +// Structures and enums defined within this file where created using +// information from Microsofts publicly available PE/COFF format document: +// +// Microsoft Portable Executable and Common Object File Format Specification +// Revision 8.1 - February 15, 2008 +// +// As of 5/2/2010, hosted by microsoft at: +// http://www.microsoft.com/whdc/system/platform/firmware/pecoff.mspx +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_WIN_COFF_H +#define LLVM_SUPPORT_WIN_COFF_H + +#include "llvm/System/DataTypes.h" +#include + +namespace llvm { +namespace COFF { + + // Sizes in bytes of various things in the COFF format. + enum { + HeaderSize = 20, + NameSize = 8, + SymbolSize = 18, + SectionSize = 40, + RelocationSize = 10 + }; + + struct header { + uint16_t Machine; + uint16_t NumberOfSections; + uint32_t TimeDateStamp; + uint32_t PointerToSymbolTable; + uint32_t NumberOfSymbols; + uint16_t SizeOfOptionalHeader; + uint16_t Characteristics; + }; + + struct symbol { + char Name[NameSize]; + uint32_t Value; + uint16_t Type; + uint8_t StorageClass; + uint16_t SectionNumber; + uint8_t NumberOfAuxSymbols; + }; + + enum symbol_flags { + SF_TypeMask = 0x0000FFFF, + SF_TypeShift = 0, + + SF_ClassMask = 0x00FF0000, + SF_ClassShift = 16, + + SF_WeakReference = 0x01000000 + }; + + enum symbol_storage_class { + IMAGE_SYM_CLASS_END_OF_FUNCTION = -1, + IMAGE_SYM_CLASS_NULL = 0, + IMAGE_SYM_CLASS_AUTOMATIC = 1, + IMAGE_SYM_CLASS_EXTERNAL = 2, + IMAGE_SYM_CLASS_STATIC = 3, + IMAGE_SYM_CLASS_REGISTER = 4, + IMAGE_SYM_CLASS_EXTERNAL_DEF = 5, + IMAGE_SYM_CLASS_LABEL = 6, + IMAGE_SYM_CLASS_UNDEFINED_LABEL = 7, + IMAGE_SYM_CLASS_MEMBER_OF_STRUCT = 8, + IMAGE_SYM_CLASS_ARGUMENT = 9, + IMAGE_SYM_CLASS_STRUCT_TAG = 10, + IMAGE_SYM_CLASS_MEMBER_OF_UNION = 11, + IMAGE_SYM_CLASS_UNION_TAG = 12, + IMAGE_SYM_CLASS_TYPE_DEFINITION = 13, + IMAGE_SYM_CLASS_UNDEFINED_STATIC = 14, + IMAGE_SYM_CLASS_ENUM_TAG = 15, + IMAGE_SYM_CLASS_MEMBER_OF_ENUM = 16, + IMAGE_SYM_CLASS_REGISTER_PARAM = 17, + IMAGE_SYM_CLASS_BIT_FIELD = 18, + IMAGE_SYM_CLASS_BLOCK = 100, + IMAGE_SYM_CLASS_FUNCTION = 101, + IMAGE_SYM_CLASS_END_OF_STRUCT = 102, + IMAGE_SYM_CLASS_FILE = 103, + IMAGE_SYM_CLASS_SECTION = 104, + IMAGE_SYM_CLASS_WEAK_EXTERNAL = 105, + IMAGE_SYM_CLASS_CLR_TOKEN = 107 + }; + + struct section { + char Name[NameSize]; + uint32_t VirtualSize; + uint32_t VirtualAddress; + uint32_t SizeOfRawData; + uint32_t PointerToRawData; + uint32_t PointerToRelocations; + uint32_t PointerToLineNumbers; + uint16_t NumberOfRelocations; + uint16_t NumberOfLineNumbers; + uint32_t Characteristics; + }; + + enum section_characteristics { + IMAGE_SCN_TYPE_NO_PAD = 0x00000008, + IMAGE_SCN_CNT_CODE = 0x00000020, + IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040, + IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080, + IMAGE_SCN_LNK_OTHER = 0x00000100, + IMAGE_SCN_LNK_INFO = 0x00000200, + IMAGE_SCN_LNK_REMOVE = 0x00000800, + IMAGE_SCN_LNK_COMDAT = 0x00001000, + IMAGE_SCN_GPREL = 0x00008000, + IMAGE_SCN_MEM_PURGEABLE = 0x00020000, + IMAGE_SCN_MEM_16BIT = 0x00020000, + IMAGE_SCN_MEM_LOCKED = 0x00040000, + IMAGE_SCN_MEM_PRELOAD = 0x00080000, + IMAGE_SCN_ALIGN_1BYTES = 0x00100000, + IMAGE_SCN_ALIGN_2BYTES = 0x00200000, + IMAGE_SCN_ALIGN_4BYTES = 0x00300000, + IMAGE_SCN_ALIGN_8BYTES = 0x00400000, + IMAGE_SCN_ALIGN_16BYTES = 0x00500000, + IMAGE_SCN_ALIGN_32BYTES = 0x00600000, + IMAGE_SCN_ALIGN_64BYTES = 0x00700000, + IMAGE_SCN_ALIGN_128BYTES = 0x00800000, + IMAGE_SCN_ALIGN_256BYTES = 0x00900000, + IMAGE_SCN_ALIGN_512BYTES = 0x00A00000, + IMAGE_SCN_ALIGN_1024BYTES = 0x00B00000, + IMAGE_SCN_ALIGN_2048BYTES = 0x00C00000, + IMAGE_SCN_ALIGN_4096BYTES = 0x00D00000, + IMAGE_SCN_ALIGN_8192BYTES = 0x00E00000, + IMAGE_SCN_LNK_NRELOC_OVFL = 0x01000000, + IMAGE_SCN_MEM_DISCARDABLE = 0x02000000, + IMAGE_SCN_MEM_NOT_CACHED = 0x04000000, + IMAGE_SCN_MEM_NOT_PAGED = 0x08000000, + IMAGE_SCN_MEM_SHARED = 0x10000000, + IMAGE_SCN_MEM_EXECUTE = 0x20000000, + IMAGE_SCN_MEM_READ = 0x40000000, + IMAGE_SCN_MEM_WRITE = 0x80000000 + }; + + struct relocation { + uint32_t VirtualAddress; + uint32_t SymbolTableIndex; + uint16_t Type; + }; + + enum relocation_type_x86 { + IMAGE_REL_I386_ABSOLUTE = 0x0000, + IMAGE_REL_I386_DIR16 = 0x0001, + IMAGE_REL_I386_REL16 = 0x0002, + IMAGE_REL_I386_DIR32 = 0x0006, + IMAGE_REL_I386_DIR32NB = 0x0007, + IMAGE_REL_I386_SEG12 = 0x0009, + IMAGE_REL_I386_SECTION = 0x000A, + IMAGE_REL_I386_SECREL = 0x000B, + IMAGE_REL_I386_TOKEN = 0x000C, + IMAGE_REL_I386_SECREL7 = 0x000D, + IMAGE_REL_I386_REL32 = 0x0014 + }; + + enum { + IMAGE_COMDAT_SELECT_NODUPLICATES = 1, + IMAGE_COMDAT_SELECT_ANY, + IMAGE_COMDAT_SELECT_SAME_SIZE, + IMAGE_COMDAT_SELECT_EXACT_MATCH, + IMAGE_COMDAT_SELECT_ASSOCIATIVE, + IMAGE_COMDAT_SELECT_LARGEST + }; + +} // End namespace llvm. +} // End namespace COFF. + +#endif diff --git a/include/llvm/Support/CallSite.h b/include/llvm/Support/CallSite.h index 0650b61..38ee08b 100644 --- a/include/llvm/Support/CallSite.h +++ b/include/llvm/Support/CallSite.h @@ -204,9 +204,9 @@ public: CALLSITE_DELEGATE_GETTER(isNoInline()); } void setIsNoInline(bool Value = true) { - CALLSITE_DELEGATE_GETTER(setIsNoInline(Value)); + CALLSITE_DELEGATE_SETTER(setIsNoInline(Value)); } - + /// @brief Determine if the call does not access memory. bool doesNotAccessMemory() const { CALLSITE_DELEGATE_GETTER(doesNotAccessMemory()); @@ -256,14 +256,14 @@ private: /// Returns the operand number of the first argument unsigned getArgumentOffset() const { if (isCall()) - return 1; // Skip Function (ATM) + return CallInst::ArgOffset; // Skip Function (ATM) else return 0; // Args are at the front } unsigned getArgumentEndOffset() const { if (isCall()) - return 0; // Unchanged (ATM) + return CallInst::ArgOffset ? 0 : 1; // Unchanged (ATM) else return 3; // Skip BB, BB, Function } @@ -273,7 +273,9 @@ private: // of the op_*() functions here. See CallSite::getCallee. // if (isCall()) - return getInstruction()->op_begin(); // Unchanged (ATM) + return CallInst::ArgOffset + ? getInstruction()->op_begin() // Unchanged + : getInstruction()->op_end() - 1; // Skip Function else return getInstruction()->op_end() - 3; // Skip BB, BB, Function } diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index 3d25e03..3ca8d96 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -300,12 +300,99 @@ enum dwarf_constants { DW_OP_ne = 0x2e, DW_OP_lit0 = 0x30, DW_OP_lit1 = 0x31, + DW_OP_lit2 = 0x32, + DW_OP_lit3 = 0x33, + DW_OP_lit4 = 0x34, + DW_OP_lit5 = 0x35, + DW_OP_lit6 = 0x36, + DW_OP_lit7 = 0x37, + DW_OP_lit8 = 0x38, + DW_OP_lit9 = 0x39, + DW_OP_lit10 = 0x3a, + DW_OP_lit11 = 0x3b, + DW_OP_lit12 = 0x3c, + DW_OP_lit13 = 0x3d, + DW_OP_lit14 = 0x3e, + DW_OP_lit15 = 0x3f, + DW_OP_lit16 = 0x40, + DW_OP_lit17 = 0x41, + DW_OP_lit18 = 0x42, + DW_OP_lit19 = 0x43, + DW_OP_lit20 = 0x44, + DW_OP_lit21 = 0x45, + DW_OP_lit22 = 0x46, + DW_OP_lit23 = 0x47, + DW_OP_lit24 = 0x48, + DW_OP_lit25 = 0x49, + DW_OP_lit26 = 0x4a, + DW_OP_lit27 = 0x4b, + DW_OP_lit28 = 0x4c, + DW_OP_lit29 = 0x4d, + DW_OP_lit30 = 0x4e, DW_OP_lit31 = 0x4f, DW_OP_reg0 = 0x50, DW_OP_reg1 = 0x51, + DW_OP_reg2 = 0x52, + DW_OP_reg3 = 0x53, + DW_OP_reg4 = 0x54, + DW_OP_reg5 = 0x55, + DW_OP_reg6 = 0x56, + DW_OP_reg7 = 0x57, + DW_OP_reg8 = 0x58, + DW_OP_reg9 = 0x59, + DW_OP_reg10 = 0x5a, + DW_OP_reg11 = 0x5b, + DW_OP_reg12 = 0x5c, + DW_OP_reg13 = 0x5d, + DW_OP_reg14 = 0x5e, + DW_OP_reg15 = 0x5f, + DW_OP_reg16 = 0x60, + DW_OP_reg17 = 0x61, + DW_OP_reg18 = 0x62, + DW_OP_reg19 = 0x63, + DW_OP_reg20 = 0x64, + DW_OP_reg21 = 0x65, + DW_OP_reg22 = 0x66, + DW_OP_reg23 = 0x67, + DW_OP_reg24 = 0x68, + DW_OP_reg25 = 0x69, + DW_OP_reg26 = 0x6a, + DW_OP_reg27 = 0x6b, + DW_OP_reg28 = 0x6c, + DW_OP_reg29 = 0x6d, + DW_OP_reg30 = 0x6e, DW_OP_reg31 = 0x6f, DW_OP_breg0 = 0x70, DW_OP_breg1 = 0x71, + DW_OP_breg2 = 0x72, + DW_OP_breg3 = 0x73, + DW_OP_breg4 = 0x74, + DW_OP_breg5 = 0x75, + DW_OP_breg6 = 0x76, + DW_OP_breg7 = 0x77, + DW_OP_breg8 = 0x78, + DW_OP_breg9 = 0x79, + DW_OP_breg10 = 0x7a, + DW_OP_breg11 = 0x7b, + DW_OP_breg12 = 0x7c, + DW_OP_breg13 = 0x7d, + DW_OP_breg14 = 0x7e, + DW_OP_breg15 = 0x7f, + DW_OP_breg16 = 0x80, + DW_OP_breg17 = 0x81, + DW_OP_breg18 = 0x82, + DW_OP_breg19 = 0x83, + DW_OP_breg20 = 0x84, + DW_OP_breg21 = 0x85, + DW_OP_breg22 = 0x86, + DW_OP_breg23 = 0x87, + DW_OP_breg24 = 0x88, + DW_OP_breg25 = 0x89, + DW_OP_breg26 = 0x8a, + DW_OP_breg27 = 0x8b, + DW_OP_breg28 = 0x8c, + DW_OP_breg29 = 0x8d, + DW_OP_breg30 = 0x8e, DW_OP_breg31 = 0x8f, DW_OP_regx = 0x90, DW_OP_fbreg = 0x91, diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h index d09db39..6f939e7 100644 --- a/include/llvm/Support/ELF.h +++ b/include/llvm/Support/ELF.h @@ -10,11 +10,10 @@ // This header contains common, non-processor-specific data structures and // constants for the ELF file format. // -// The details of the ELF32 bits in this file are largely based on -// the Tool Interface Standard (TIS) Executable and Linking Format -// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not -// standardized, as far as I can tell. It was largely based on information -// I found in OpenBSD header files. +// The details of the ELF32 bits in this file are largely based on the Tool +// Interface Standard (TIS) Executable and Linking Format (ELF) Specification +// Version 1.2, May 1995. The ELF64 stuff is based on ELF-64 Object File Format +// Version 1.5, Draft 2, May 1998 as well as OpenBSD header files. // //===----------------------------------------------------------------------===// @@ -47,8 +46,23 @@ typedef uint16_t Elf64_Quarter; // Object file magic string. static const char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' }; +// e_ident size and indices. +enum { + EI_MAG0 = 0, // File identification index. + EI_MAG1 = 1, // File identification index. + EI_MAG2 = 2, // File identification index. + EI_MAG3 = 3, // File identification index. + EI_CLASS = 4, // File class. + EI_DATA = 5, // Data encoding. + EI_VERSION = 6, // File version. + EI_OSABI = 7, // OS/ABI identification. + EI_ABIVERSION = 8, // ABI version. + EI_PAD = 9, // Start of padding bytes. + EI_NIDENT = 16 // Number of bytes in e_ident. +}; + struct Elf32_Ehdr { - unsigned char e_ident[16]; // ELF Identification bytes + unsigned char e_ident[EI_NIDENT]; // ELF Identification bytes Elf32_Half e_type; // Type of file (see ET_* below) Elf32_Half e_machine; // Required architecture for this file (see EM_*) Elf32_Word e_version; // Must be equal to 1 @@ -62,17 +76,17 @@ struct Elf32_Ehdr { Elf32_Half e_shentsize; // Size of an entry in the section header table Elf32_Half e_shnum; // Number of entries in the section header table Elf32_Half e_shstrndx; // Sect hdr table index of sect name string table - bool checkMagic () const { - return (memcmp (e_ident, ElfMagic, strlen (ElfMagic))) == 0; + bool checkMagic() const { + return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; } - unsigned char getFileClass () const { return e_ident[4]; } - unsigned char getDataEncoding () { return e_ident[5]; } + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } + unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } }; // 64-bit ELF header. Fields are the same as for ELF32, but with different // types (see above). struct Elf64_Ehdr { - unsigned char e_ident[16]; + unsigned char e_ident[EI_NIDENT]; Elf64_Quarter e_type; Elf64_Quarter e_machine; Elf64_Half e_version; @@ -86,6 +100,11 @@ struct Elf64_Ehdr { Elf64_Quarter e_shentsize; Elf64_Quarter e_shnum; Elf64_Quarter e_shstrndx; + bool checkMagic() const { + return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0; + } + unsigned char getFileClass() const { return e_ident[EI_CLASS]; } + unsigned char getDataEncoding() const { return e_ident[EI_DATA]; } }; // File types @@ -117,6 +136,7 @@ enum { EM_860 = 7, // Intel 80860 EM_MIPS = 8, // MIPS R3000 EM_PPC = 20, // PowerPC + EM_PPC64 = 21, // PowerPC64 EM_ARM = 40, // ARM EM_ALPHA = 41, // DEC Alpha EM_SPARCV9 = 43, // SPARC V9 @@ -131,13 +151,69 @@ enum { // Object file byte orderings. enum { + ELFDATANONE = 0, // Invalid data encoding. ELFDATA2LSB = 1, // Little-endian object file ELFDATA2MSB = 2 // Big-endian object file }; -// OS ABI identification -- unused. +// OS ABI identification. enum { - ELFOSABI_NONE = 0 + ELFOSABI_NONE = 0, // UNIX System V ABI + ELFOSABI_HPUX = 1, // HP-UX operating system + ELFOSABI_NETBSD = 2, // NetBSD + ELFOSABI_LINUX = 3, // GNU/Linux + ELFOSABI_HURD = 4, // GNU/Hurd + ELFOSABI_SOLARIS = 6, // Solaris + ELFOSABI_AIX = 7, // AIX + ELFOSABI_IRIX = 8, // IRIX + ELFOSABI_FREEBSD = 9, // FreeBSD + ELFOSABI_TRU64 = 10, // TRU64 UNIX + ELFOSABI_MODESTO = 11, // Novell Modesto + ELFOSABI_OPENBSD = 12, // OpenBSD + ELFOSABI_OPENVMS = 13, // OpenVMS + ELFOSABI_NSK = 14, // Hewlett-Packard Non-Stop Kernel + ELFOSABI_AROS = 15, // AROS + ELFOSABI_FENIXOS = 16, // FenixOS + ELFOSABI_C6000_ELFABI = 64, // Bare-metal TMS320C6000 + ELFOSABI_C6000_LINUX = 65, // Linux TMS320C6000 + ELFOSABI_ARM = 97, // ARM + ELFOSABI_STANDALONE = 255 // Standalone (embedded) application +}; + +// X86_64 relocations. +enum { + R_X86_64_NONE = 0, + R_X86_64_64 = 1, + R_X86_64_PC32 = 2, + R_X86_64_GOT32 = 3, + R_X86_64_PLT32 = 4, + R_X86_64_COPY = 5, + R_X86_64_GLOB_DAT = 6, + R_X86_64_JUMP_SLOT = 7, + R_X86_64_RELATIVE = 8, + R_X86_64_GOTPCREL = 9, + R_X86_64_32 = 10, + R_X86_64_32S = 11, + R_X86_64_16 = 12, + R_X86_64_PC16 = 13, + R_X86_64_8 = 14, + R_X86_64_PC8 = 15, + R_X86_64_DTPMOD64 = 16, + R_X86_64_DTPOFF64 = 17, + R_X86_64_TPOFF64 = 18, + R_X86_64_TLSGD = 19, + R_X86_64_TLSLD = 20, + R_X86_64_DTPOFF32 = 21, + R_X86_64_GOTTPOFF = 22, + R_X86_64_TPOFF32 = 23, + R_X86_64_PC64 = 24, + R_X86_64_GOTOFF64 = 25, + R_X86_64_GOTPC32 = 26, + R_X86_64_SIZE32 = 32, + R_X86_64_SIZE64 = 33, + R_X86_64_GOTPC32_TLSDESC = 34, + R_X86_64_TLSDESC_CALL = 35, + R_X86_64_TLSDESC = 36 }; // Section header. @@ -207,7 +283,7 @@ enum { SHF_MASKPROC = 0xf0000000 // Bits indicating processor-specific flags. }; -// Symbol table entries. +// Symbol table entries for ELF32. struct Elf32_Sym { Elf32_Word st_name; // Symbol name (index into string table) Elf32_Addr st_value; // Value or address associated with the symbol @@ -218,11 +294,31 @@ struct Elf32_Sym { // These accessors and mutators correspond to the ELF32_ST_BIND, // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification: - unsigned char getBinding () const { return st_info >> 4; } - unsigned char getType () const { return st_info & 0x0f; } - void setBinding (unsigned char b) { setBindingAndType (b, getType ()); } - void setType (unsigned char t) { setBindingAndType (getBinding (), t); } - void setBindingAndType (unsigned char b, unsigned char t) { + unsigned char getBinding() const { return st_info >> 4; } + unsigned char getType() const { return st_info & 0x0f; } + void setBinding(unsigned char b) { setBindingAndType(b, getType()); } + void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { + st_info = (b << 4) + (t & 0x0f); + } +}; + +// Symbol table entries for ELF64. +struct Elf64_Sym { + Elf64_Word st_name; // Symbol name (index into string table) + unsigned char st_info; // Symbol's type and binding attributes + unsigned char st_other; // Must be zero; reserved + Elf64_Half st_shndx; // Which section (header table index) it's defined in + Elf64_Addr st_value; // Value or address associated with the symbol + Elf64_Xword st_size; // Size of the symbol + + // These accessors and mutators are identical to those defined for ELF32 + // symbol table entries. + unsigned char getBinding() const { return st_info >> 4; } + unsigned char getType() const { return st_info & 0x0f; } + void setBinding(unsigned char b) { setBindingAndType(b, getType()); } + void setType(unsigned char t) { setBindingAndType(getBinding(), t); } + void setBindingAndType(unsigned char b, unsigned char t) { st_info = (b << 4) + (t & 0x0f); } }; @@ -254,11 +350,11 @@ struct Elf32_Rel { // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, // and ELF32_R_INFO macros defined in the ELF specification: - Elf32_Word getSymbol () const { return (r_info >> 8); } - unsigned char getType () const { return (unsigned char) (r_info & 0x0ff); } - void setSymbol (Elf32_Word s) { setSymbolAndType (s, getType ()); } - void setType (unsigned char t) { setSymbolAndType (getSymbol(), t); } - void setSymbolAndType (Elf32_Word s, unsigned char t) { + Elf32_Word getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); } + void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf32_Word s, unsigned char t) { r_info = (s << 8) + t; }; }; @@ -271,16 +367,53 @@ struct Elf32_Rela { // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE, // and ELF32_R_INFO macros defined in the ELF specification: - Elf32_Word getSymbol () const { return (r_info >> 8); } - unsigned char getType () const { return (unsigned char) (r_info & 0x0ff); } - void setSymbol (Elf32_Word s) { setSymbolAndType (s, getType ()); } - void setType (unsigned char t) { setSymbolAndType (getSymbol(), t); } - void setSymbolAndType (Elf32_Word s, unsigned char t) { + Elf32_Word getSymbol() const { return (r_info >> 8); } + unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); } + void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf32_Word s, unsigned char t) { r_info = (s << 8) + t; }; }; -// Program header. +// Relocation entry, without explicit addend. +struct Elf64_Rel { + Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr). + Elf64_Xword r_info; // Symbol table index and type of relocation to apply. + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + Elf64_Xword getSymbol() const { return (r_info >> 32); } + unsigned char getType() const { + return (unsigned char) (r_info & 0xffffffffL); + } + void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf64_Xword s, unsigned char t) { + r_info = (s << 32) + (t&0xffffffffL); + }; +}; + +// Relocation entry with explicit addend. +struct Elf64_Rela { + Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr). + Elf64_Xword r_info; // Symbol table index and type of relocation to apply. + Elf64_Sxword r_addend; // Compute value for relocatable field by adding this. + + // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE, + // and ELF64_R_INFO macros defined in the ELF specification: + Elf64_Xword getSymbol() const { return (r_info >> 32); } + unsigned char getType() const { + return (unsigned char) (r_info & 0xffffffffL); + } + void setSymbol(Elf64_Xword s) { setSymbolAndType(s, getType()); } + void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); } + void setSymbolAndType(Elf64_Xword s, unsigned char t) { + r_info = (s << 32) + (t&0xffffffffL); + }; +}; + +// Program header for ELF32. struct Elf32_Phdr { Elf32_Word p_type; // Type of segment Elf32_Off p_offset; // File offset where segment is located, in bytes @@ -292,6 +425,18 @@ struct Elf32_Phdr { Elf32_Word p_align; // Segment alignment constraint }; +// Program header for ELF64. +struct Elf64_Phdr { + Elf64_Word p_type; // Type of segment + Elf64_Word p_flags; // Segment flags + Elf64_Off p_offset; // File offset where segment is located, in bytes + Elf64_Addr p_vaddr; // Virtual address of beginning of segment + Elf64_Addr p_paddr; // Physical address of beginning of segment (OS-specific) + Elf64_Xword p_filesz; // Num. of bytes in file image of segment (may be zero) + Elf64_Xword p_memsz; // Num. of bytes in mem image of segment (may be zero) + Elf64_Xword p_align; // Segment alignment constraint +}; + // Segment types. enum { PT_NULL = 0, // Unused segment. @@ -313,6 +458,65 @@ enum { PF_MASKPROC = 0xf0000000 // Unspecified }; +// Dynamic table entry for ELF32. +struct Elf32_Dyn +{ + Elf32_Sword d_tag; // Type of dynamic table entry. + union + { + Elf32_Word d_val; // Integer value of entry. + Elf32_Addr d_ptr; // Pointer value of entry. + } d_un; +}; + +// Dynamic table entry for ELF64. +struct Elf64_Dyn +{ + Elf64_Sxword d_tag; // Type of dynamic table entry. + union + { + Elf64_Xword d_val; // Integer value of entry. + Elf64_Addr d_ptr; // Pointer value of entry. + } d_un; +}; + +// Dynamic table entry tags. +enum { + DT_NULL = 0, // Marks end of dynamic array. + DT_NEEDED = 1, // String table offset of needed library. + DT_PLTRELSZ = 2, // Size of relocation entries in PLT. + DT_PLTGOT = 3, // Address associated with linkage table. + DT_HASH = 4, // Address of symbolic hash table. + DT_STRTAB = 5, // Address of dynamic string table. + DT_SYMTAB = 6, // Address of dynamic symbol table. + DT_RELA = 7, // Address of relocation table (Rela entries). + DT_RELASZ = 8, // Size of Rela relocation table. + DT_RELAENT = 9, // Size of a Rela relocation entry. + DT_STRSZ = 10, // Total size of the string table. + DT_SYMENT = 11, // Size of a symbol table entry. + DT_INIT = 12, // Address of initialization function. + DT_FINI = 13, // Address of termination function. + DT_SONAME = 14, // String table offset of a shared objects name. + DT_RPATH = 15, // String table offset of library search path. + DT_SYMBOLIC = 16, // Changes symbol resolution algorithm. + DT_REL = 17, // Address of relocation table (Rel entries). + DT_RELSZ = 18, // Size of Rel relocation table. + DT_RELENT = 19, // Size of a Rel relocation entry. + DT_PLTREL = 20, // Type of relocation entry used for linking. + DT_DEBUG = 21, // Reserved for debugger. + DT_TEXTREL = 22, // Relocations exist for non-writable segements. + DT_JMPREL = 23, // Address of relocations associated with PLT. + DT_BIND_NOW = 24, // Process all relocations before execution. + DT_INIT_ARRAY = 25, // Pointer to array of initialization functions. + DT_FINI_ARRAY = 26, // Pointer to array of termination functions. + DT_INIT_ARRAYSZ = 27, // Size of DT_INIT_ARRAY. + DT_FINI_ARRAYSZ = 28, // Size of DT_FINI_ARRAY. + DT_LOOS = 0x60000000, // Start of environment specific tags. + DT_HIOS = 0x6FFFFFFF, // End of environment specific tags. + DT_LOPROC = 0x70000000, // Start of processor specific tags. + DT_HIPROC = 0x7FFFFFFF // End of processor specific tags. +}; + } // end namespace ELF } // end namespace llvm diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h index 1fd965d..4b1b1c0 100644 --- a/include/llvm/Support/IRBuilder.h +++ b/include/llvm/Support/IRBuilder.h @@ -97,6 +97,48 @@ public: I->setDebugLoc(CurDbgLocation); } + /// InsertPoint - A saved insertion point. + class InsertPoint { + BasicBlock *Block; + BasicBlock::iterator Point; + + public: + /// Creates a new insertion point which doesn't point to anything. + InsertPoint() : Block(0) {} + + /// Creates a new insertion point at the given location. + InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint) + : Block(InsertBlock), Point(InsertPoint) {} + + /// isSet - Returns true if this insert point is set. + bool isSet() const { return (Block != 0); } + + llvm::BasicBlock *getBlock() const { return Block; } + llvm::BasicBlock::iterator getPoint() const { return Point; } + }; + + /// saveIP - Returns the current insert point. + InsertPoint saveIP() const { + return InsertPoint(GetInsertBlock(), GetInsertPoint()); + } + + /// saveAndClearIP - Returns the current insert point, clearing it + /// in the process. + InsertPoint saveAndClearIP() { + InsertPoint IP(GetInsertBlock(), GetInsertPoint()); + ClearInsertionPoint(); + return IP; + } + + /// restoreIP - Sets the current insert point to a previously-saved + /// location. + void restoreIP(InsertPoint IP) { + if (IP.isSet()) + SetInsertPoint(IP.getBlock(), IP.getPoint()); + else + ClearInsertionPoint(); + } + //===--------------------------------------------------------------------===// // Miscellaneous creation methods. //===--------------------------------------------------------------------===// @@ -106,33 +148,88 @@ public: /// specified. If Name is specified, it is the name of the global variable /// created. Value *CreateGlobalString(const char *Str = "", const Twine &Name = ""); + + /// getInt1 - Get a constant value representing either true or false. + ConstantInt *getInt1(bool V) { + return ConstantInt::get(getInt1Ty(), V); + } + + /// getTrue - Get the constant value for i1 true. + ConstantInt *getTrue() { + return ConstantInt::getTrue(Context); + } + + /// getFalse - Get the constant value for i1 false. + ConstantInt *getFalse() { + return ConstantInt::getFalse(Context); + } + + /// getInt8 - Get a constant 8-bit value. + ConstantInt *getInt8(int8_t C) { + return ConstantInt::getSigned(getInt8Ty(), C); + } + + /// getInt8 - Get a constant 8-bit value. + ConstantInt *getInt8(uint8_t C) { + return ConstantInt::get(getInt8Ty(), C); + } + + /// getInt16 - Get a constant 16-bit value. + ConstantInt *getInt16(int16_t C) { + return ConstantInt::getSigned(getInt16Ty(), C); + } + + /// getInt16 - Get a constant 16-bit value. + ConstantInt *getInt16(uint16_t C) { + return ConstantInt::get(getInt16Ty(), C); + } + + /// getInt32 - Get a constant 32-bit value. + ConstantInt *getInt32(int32_t C) { + return ConstantInt::getSigned(getInt32Ty(), C); + } + + /// getInt32 - Get a constant 32-bit value. + ConstantInt *getInt32(uint32_t C) { + return ConstantInt::get(getInt32Ty(), C); + } + + /// getInt64 - Get a constant 64-bit value. + ConstantInt *getInt64(int64_t C) { + return ConstantInt::getSigned(getInt64Ty(), C); + } + + /// getInt64 - Get a constant 64-bit value. + ConstantInt *getInt64(uint64_t C) { + return ConstantInt::get(getInt64Ty(), C); + } //===--------------------------------------------------------------------===// // Type creation methods //===--------------------------------------------------------------------===// /// getInt1Ty - Fetch the type representing a single bit - const Type *getInt1Ty() { + const IntegerType *getInt1Ty() { return Type::getInt1Ty(Context); } /// getInt8Ty - Fetch the type representing an 8-bit integer. - const Type *getInt8Ty() { + const IntegerType *getInt8Ty() { return Type::getInt8Ty(Context); } /// getInt16Ty - Fetch the type representing a 16-bit integer. - const Type *getInt16Ty() { + const IntegerType *getInt16Ty() { return Type::getInt16Ty(Context); } /// getInt32Ty - Fetch the type resepresenting a 32-bit integer. - const Type *getInt32Ty() { + const IntegerType *getInt32Ty() { return Type::getInt32Ty(Context); } /// getInt64Ty - Fetch the type representing a 64-bit integer. - const Type *getInt64Ty() { + const IntegerType *getInt64Ty() { return Type::getInt64Ty(Context); } @@ -151,7 +248,7 @@ public: return Type::getVoidTy(Context); } - const Type *getInt8PtrTy() { + const PointerType *getInt8PtrTy() { return Type::getInt8PtrTy(Context); } @@ -624,8 +721,8 @@ public: return Insert(GetElementPtrInst::Create(Ptr, IdxBegin, IdxEnd), Name); } template - Value *CreateInBoundsGEP(Value *Ptr, InputIterator IdxBegin, InputIterator IdxEnd, - const Twine &Name = "") { + Value *CreateInBoundsGEP(Value *Ptr, InputIterator IdxBegin, + InputIterator IdxEnd, const Twine &Name = "") { if (Constant *PC = dyn_cast(Ptr)) { // Every index must be constant. InputIterator i; diff --git a/include/llvm/Support/IRReader.h b/include/llvm/Support/IRReader.h index 0dfc302..fe47c05 100644 --- a/include/llvm/Support/IRReader.h +++ b/include/llvm/Support/IRReader.h @@ -60,7 +60,8 @@ namespace llvm { MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { Err = SMDiagnostic(Filename, - "Could not open input file '" + Filename + "'"); + "Could not open input file " + "'" + Filename + "': " + ErrMsg); return 0; } @@ -98,7 +99,8 @@ namespace llvm { MemoryBuffer *F = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), &ErrMsg); if (F == 0) { Err = SMDiagnostic(Filename, - "Could not open input file '" + Filename + "'"); + "Could not open input file " + "'" + Filename + "': " + ErrMsg); return 0; } diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index ef7af69..8a41aa5 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -26,17 +26,20 @@ namespace llvm { /// into a memory buffer. In addition to basic access to the characters in the /// file, this interface guarantees you can read one character past the end of /// the file, and that this character will read as '\0'. +/// +/// The '\0' guarantee is needed to support an optimization -- it's intended to +/// be more efficient for clients which are reading all the data to stop +/// reading when they encounter a '\0' than to continually check the file +/// position to see if it has reached the end of the file. class MemoryBuffer { const char *BufferStart; // Start of the buffer. const char *BufferEnd; // End of the buffer. - /// MustDeleteBuffer - True if we allocated this buffer. If so, the - /// destructor must know the delete[] it. - bool MustDeleteBuffer; + MemoryBuffer(const MemoryBuffer &); // DO NOT IMPLEMENT + MemoryBuffer &operator=(const MemoryBuffer &); // DO NOT IMPLEMENT protected: - MemoryBuffer() : MustDeleteBuffer(false) {} + MemoryBuffer() {} void init(const char *BufStart, const char *BufEnd); - void initCopyOf(const char *BufStart, const char *BufEnd); public: virtual ~MemoryBuffer(); @@ -62,24 +65,27 @@ public: std::string *ErrStr = 0, int64_t FileSize = -1, struct stat *FileInfo = 0); + static MemoryBuffer *getFile(const char *Filename, + std::string *ErrStr = 0, + int64_t FileSize = -1, + struct stat *FileInfo = 0); /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! static MemoryBuffer *getMemBuffer(StringRef InputData, - const char *BufferName = ""); + StringRef BufferName = ""); /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. This has no requirements /// on EndPtr[0]. static MemoryBuffer *getMemBufferCopy(StringRef InputData, - const char *BufferName = ""); + StringRef BufferName = ""); /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that /// is completely initialized to zeros. Note that the caller should /// initialize the memory allocated by this method. The memory is owned by /// the MemoryBuffer object. - static MemoryBuffer *getNewMemBuffer(size_t Size, - const char *BufferName = ""); + static MemoryBuffer *getNewMemBuffer(size_t Size, StringRef BufferName = ""); /// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size /// that is not initialized. Note that the caller should initialize the @@ -89,7 +95,8 @@ public: StringRef BufferName = ""); /// getSTDIN - Read all of stdin into a file buffer, and return it. - static MemoryBuffer *getSTDIN(); + /// If an error occurs, this returns null and fills in *ErrStr with a reason. + static MemoryBuffer *getSTDIN(std::string *ErrStr = 0); /// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin @@ -99,6 +106,10 @@ public: std::string *ErrStr = 0, int64_t FileSize = -1, struct stat *FileInfo = 0); + static MemoryBuffer *getFileOrSTDIN(const char *Filename, + std::string *ErrStr = 0, + int64_t FileSize = -1, + struct stat *FileInfo = 0); }; } // end namespace llvm diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h index 00dfeaa..f959136 100644 --- a/include/llvm/Support/Timer.h +++ b/include/llvm/Support/Timer.h @@ -150,8 +150,10 @@ public: /// is primarily used for debugging and for hunting performance problems. /// struct NamedRegionTimer : public TimeRegion { - explicit NamedRegionTimer(StringRef Name); - explicit NamedRegionTimer(StringRef Name, StringRef GroupName); + explicit NamedRegionTimer(StringRef Name, + bool Enabled = true); + explicit NamedRegionTimer(StringRef Name, StringRef GroupName, + bool Enabled = true); }; diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h index 90eaeea..bb9a523 100644 --- a/include/llvm/Support/raw_ostream.h +++ b/include/llvm/Support/raw_ostream.h @@ -234,8 +234,8 @@ public: /// @param bold bold/brighter text, default false /// @param bg if true change the background, default: change foreground /// @returns itself so it can be used within << invocations - virtual raw_ostream &changeColor(enum Colors, bool = false, - bool = false) { return *this; } + virtual raw_ostream &changeColor(enum Colors, bool = false, bool = false) { + return *this; } /// Resets the colors to terminal defaults. Call this when you are done /// outputting colored text, or before program exit. diff --git a/include/llvm/SymbolTableListTraits.h b/include/llvm/SymbolTableListTraits.h index 39953e1..91a4eb9 100644 --- a/include/llvm/SymbolTableListTraits.h +++ b/include/llvm/SymbolTableListTraits.h @@ -47,9 +47,8 @@ public: /// of instructions, it returns the BasicBlock that owns them. ItemParentClass *getListOwner() { typedef iplist ItemParentClass::*Sublist; - Sublist Sub(ItemParentClass:: - getSublistAccess(static_cast(0))); - size_t Offset(size_t(&((ItemParentClass*)0->*Sub))); + size_t Offset(size_t(&((ItemParentClass*)0->*ItemParentClass:: + getSublistAccess(static_cast(0))))); iplist* Anchor(static_cast*>(this)); return reinterpret_cast(reinterpret_cast(Anchor)- Offset); diff --git a/include/llvm/System/DataTypes.h.cmake b/include/llvm/System/DataTypes.h.cmake index d9ca273..9efe75a 100644 --- a/include/llvm/System/DataTypes.h.cmake +++ b/include/llvm/System/DataTypes.h.cmake @@ -109,41 +109,59 @@ typedef unsigned short uint16_t; typedef signed char int8_t; typedef unsigned char uint8_t; typedef signed int ssize_t; -#define INT8_MAX 127 -#define INT8_MIN -128 -#define UINT8_MAX 255 -#define INT16_MAX 32767 -#define INT16_MIN -32768 -#define UINT16_MAX 65535 -#define INT32_MAX 2147483647 -#define INT32_MIN -2147483648 -#define UINT32_MAX 4294967295U +#ifndef INT8_MAX +# define INT8_MAX 127 +#endif +#ifndef INT8_MIN +# define INT8_MIN -128 +#endif +#ifndef UINT8_MAX +# define UINT8_MAX 255 +#endif +#ifndef INT16_MAX +# define INT16_MAX 32767 +#endif +#ifndef INT16_MIN +# define INT16_MIN -32768 +#endif +#ifndef UINT16_MAX +# define UINT16_MAX 65535 +#endif +#ifndef INT32_MAX +# define INT32_MAX 2147483647 +#endif +#ifndef INT32_MIN +# define INT32_MIN -2147483648 +#endif +#ifndef UINT32_MAX +# define UINT32_MAX 4294967295U +#endif /* Certain compatibility updates to VC++ introduce the `cstdint' * header, which defines the INT*_C macros. On default installs they * are absent. */ #ifndef INT8_C -# define INT8_C(C) C +# define INT8_C(C) C##i8 #endif #ifndef UINT8_C -# define UINT8_C(C) C +# define UINT8_C(C) C##ui8 #endif #ifndef INT16_C -# define INT16_C(C) C +# define INT16_C(C) C##i16 #endif #ifndef UINT16_C -# define UINT16_C(C) C +# define UINT16_C(C) C##ui16 #endif #ifndef INT32_C -# define INT32_C(C) C +# define INT32_C(C) C##i32 #endif #ifndef UINT32_C -# define UINT32_C(C) C ## U +# define UINT32_C(C) C##ui32 #endif #ifndef INT64_C -# define INT64_C(C) ((int64_t) C ## LL) +# define INT64_C(C) C##i64 #endif #ifndef UINT64_C -# define UINT64_C(C) ((uint64_t) C ## ULL) +# define UINT64_C(C) C##ui64 #endif #endif /* _MSC_VER */ diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h index d4af478..0461769 100644 --- a/include/llvm/System/Path.h +++ b/include/llvm/System/Path.h @@ -292,14 +292,6 @@ namespace sys { /// @name Disk Accessors /// @{ public: - /// This function determines if the path name in this object references - /// the root (top level directory) of the file system. The details of what - /// is considered the "root" may vary from system to system so this method - /// will do the necessary checking. - /// @returns true iff the path name references the root directory. - /// @brief Determines if the path references the root directory. - bool isRootDirectory() const; - /// This function determines if the path name is absolute, as opposed to /// relative. /// @brief Determine if the path is absolute. diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index ca551e5..9a89dc9 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -203,7 +203,6 @@ class Instruction { bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? bit mayLoad = 0; // Is it possible for this inst to read memory? bit mayStore = 0; // Is it possible for this inst to write memory? - bit isTwoAddress = 0; // Is this a two address instruction? bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote? bit isCommutable = 0; // Is this 3 operand instruction commutable? bit isTerminator = 0; // Is this part of the terminator for a basic block? @@ -244,7 +243,7 @@ class Instruction { string DisableEncoding = ""; /// Target-specific flags. This becomes the TSFlags field in TargetInstrDesc. - bits<32> TSFlags = 0; + bits<64> TSFlags = 0; } /// Predicates - These are extra conditionals which are turned into instruction @@ -397,24 +396,23 @@ class InstrInfo { } // Standard Pseudo Instructions. -let isCodeGenOnly = 1 in { +// This list must match TargetOpcodes.h and CodeGenTarget.cpp. +// Only these instructions are allowed in the TargetOpcode namespace. +let isCodeGenOnly = 1, Namespace = "TargetOpcode" in { def PHI : Instruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "PHINODE"; - let Namespace = "TargetOpcode"; } def INLINEASM : Instruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; - let Namespace = "TargetOpcode"; } def DBG_LABEL : Instruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let Namespace = "TargetOpcode"; let hasCtrlDep = 1; let isNotDuplicable = 1; } @@ -422,7 +420,6 @@ def EH_LABEL : Instruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let Namespace = "TargetOpcode"; let hasCtrlDep = 1; let isNotDuplicable = 1; } @@ -430,7 +427,6 @@ def GC_LABEL : Instruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$id); let AsmString = ""; - let Namespace = "TargetOpcode"; let hasCtrlDep = 1; let isNotDuplicable = 1; } @@ -438,21 +434,18 @@ def KILL : Instruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; } def EXTRACT_SUBREG : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, i32imm:$subidx); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; } def INSERT_SUBREG : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$supersrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; let Constraints = "$supersrc = $dst"; } @@ -460,7 +453,6 @@ def IMPLICIT_DEF : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; let isReMaterializable = 1; let isAsCheapAsAMove = 1; @@ -469,14 +461,12 @@ def SUBREG_TO_REG : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$implsrc, unknown:$subsrc, i32imm:$subidx); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; } def COPY_TO_REGCLASS : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins unknown:$src, i32imm:$regclass); let AsmString = ""; - let Namespace = "TargetOpcode"; let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; } @@ -484,15 +474,19 @@ def DBG_VALUE : Instruction { let OutOperandList = (outs); let InOperandList = (ins variable_ops); let AsmString = "DBG_VALUE"; - let Namespace = "TargetOpcode"; let isAsCheapAsAMove = 1; } - def REG_SEQUENCE : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); let AsmString = ""; - let Namespace = "TargetOpcode"; + let neverHasSideEffects = 1; + let isAsCheapAsAMove = 1; +} +def COPY : Instruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins unknown:$src); + let AsmString = ""; let neverHasSideEffects = 1; let isAsCheapAsAMove = 1; } diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h index 85315c1..dc2b236 100644 --- a/include/llvm/Target/TargetAsmParser.h +++ b/include/llvm/Target/TargetAsmParser.h @@ -10,6 +10,8 @@ #ifndef LLVM_TARGET_TARGETPARSER_H #define LLVM_TARGET_TARGETPARSER_H +#include "llvm/MC/MCParser/MCAsmParserExtension.h" + namespace llvm { class MCInst; class StringRef; @@ -20,7 +22,7 @@ class MCParsedAsmOperand; template class SmallVectorImpl; /// TargetAsmParser - Generic interface to target specific assembly parsers. -class TargetAsmParser { +class TargetAsmParser : public MCAsmParserExtension { TargetAsmParser(const TargetAsmParser &); // DO NOT IMPLEMENT void operator=(const TargetAsmParser &); // DO NOT IMPLEMENT protected: // Can only create subclasses. diff --git a/include/llvm/Target/TargetCallingConv.h b/include/llvm/Target/TargetCallingConv.h new file mode 100644 index 0000000..f368a2e --- /dev/null +++ b/include/llvm/Target/TargetCallingConv.h @@ -0,0 +1,142 @@ +//===-- llvm/Target/TargetCallingConv.h - Calling Convention ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines types for working with calling-convention information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETCALLINGCONV_H +#define LLVM_TARGET_TARGETCALLINGCONV_H + +namespace llvm { + +namespace ISD { + struct ArgFlagsTy { + private: + static const uint64_t NoFlagSet = 0ULL; + static const uint64_t ZExt = 1ULL<<0; ///< Zero extended + static const uint64_t ZExtOffs = 0; + static const uint64_t SExt = 1ULL<<1; ///< Sign extended + static const uint64_t SExtOffs = 1; + static const uint64_t InReg = 1ULL<<2; ///< Passed in register + static const uint64_t InRegOffs = 2; + static const uint64_t SRet = 1ULL<<3; ///< Hidden struct-ret ptr + static const uint64_t SRetOffs = 3; + static const uint64_t ByVal = 1ULL<<4; ///< Struct passed by value + static const uint64_t ByValOffs = 4; + static const uint64_t Nest = 1ULL<<5; ///< Nested fn static chain + static const uint64_t NestOffs = 5; + static const uint64_t ByValAlign = 0xFULL << 6; //< Struct alignment + static const uint64_t ByValAlignOffs = 6; + static const uint64_t Split = 1ULL << 10; + static const uint64_t SplitOffs = 10; + static const uint64_t OrigAlign = 0x1FULL<<27; + static const uint64_t OrigAlignOffs = 27; + static const uint64_t ByValSize = 0xffffffffULL << 32; //< Struct size + static const uint64_t ByValSizeOffs = 32; + + static const uint64_t One = 1ULL; //< 1 of this type, for shifts + + uint64_t Flags; + public: + ArgFlagsTy() : Flags(0) { } + + bool isZExt() const { return Flags & ZExt; } + void setZExt() { Flags |= One << ZExtOffs; } + + bool isSExt() const { return Flags & SExt; } + void setSExt() { Flags |= One << SExtOffs; } + + bool isInReg() const { return Flags & InReg; } + void setInReg() { Flags |= One << InRegOffs; } + + bool isSRet() const { return Flags & SRet; } + void setSRet() { Flags |= One << SRetOffs; } + + bool isByVal() const { return Flags & ByVal; } + void setByVal() { Flags |= One << ByValOffs; } + + bool isNest() const { return Flags & Nest; } + void setNest() { Flags |= One << NestOffs; } + + unsigned getByValAlign() const { + return (unsigned) + ((One << ((Flags & ByValAlign) >> ByValAlignOffs)) / 2); + } + void setByValAlign(unsigned A) { + Flags = (Flags & ~ByValAlign) | + (uint64_t(Log2_32(A) + 1) << ByValAlignOffs); + } + + bool isSplit() const { return Flags & Split; } + void setSplit() { Flags |= One << SplitOffs; } + + unsigned getOrigAlign() const { + return (unsigned) + ((One << ((Flags & OrigAlign) >> OrigAlignOffs)) / 2); + } + void setOrigAlign(unsigned A) { + Flags = (Flags & ~OrigAlign) | + (uint64_t(Log2_32(A) + 1) << OrigAlignOffs); + } + + unsigned getByValSize() const { + return (unsigned)((Flags & ByValSize) >> ByValSizeOffs); + } + void setByValSize(unsigned S) { + Flags = (Flags & ~ByValSize) | (uint64_t(S) << ByValSizeOffs); + } + + /// getArgFlagsString - Returns the flags as a string, eg: "zext align:4". + std::string getArgFlagsString(); + + /// getRawBits - Represent the flags as a bunch of bits. + uint64_t getRawBits() const { return Flags; } + }; + + /// InputArg - This struct carries flags and type information about a + /// single incoming (formal) argument or incoming (from the perspective + /// of the caller) return value virtual register. + /// + struct InputArg { + ArgFlagsTy Flags; + EVT VT; + bool Used; + + InputArg() : VT(MVT::Other), Used(false) {} + InputArg(ArgFlagsTy flags, EVT vt, bool used) + : Flags(flags), VT(vt), Used(used) { + assert(VT.isSimple() && + "InputArg value type must be Simple!"); + } + }; + + /// OutputArg - This struct carries flags and a value for a + /// single outgoing (actual) argument or outgoing (from the perspective + /// of the caller) return value virtual register. + /// + struct OutputArg { + ArgFlagsTy Flags; + EVT VT; + + /// IsFixed - Is this a "fixed" value, ie not passed through a vararg "...". + bool IsFixed; + + OutputArg() : IsFixed(false) {} + OutputArg(ArgFlagsTy flags, EVT vt, bool isfixed) + : Flags(flags), VT(vt), IsFixed(isfixed) { + assert(VT.isSimple() && + "OutputArg value type must be Simple!"); + } + }; +} + +} // end llvm namespace + +#endif diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h index adc37e1..8f0a6cb 100644 --- a/include/llvm/Target/TargetInstrDesc.h +++ b/include/llvm/Target/TargetInstrDesc.h @@ -15,6 +15,8 @@ #ifndef LLVM_TARGET_TARGETINSTRDESC_H #define LLVM_TARGET_TARGETINSTRDESC_H +#include "llvm/System/DataTypes.h" + namespace llvm { class TargetRegisterClass; @@ -53,7 +55,7 @@ public: /// /// NOTE: This member should be considered to be private, all access should go /// through "getRegClass(TRI)" below. - unsigned short RegClass; + short RegClass; /// Flags - These are flags from the TOI::OperandFlags enum. unsigned short Flags; @@ -131,7 +133,7 @@ public: unsigned short SchedClass; // enum identifying instr sched class const char * Name; // Name of the instruction record in td file unsigned Flags; // Flags identifying machine instr class - unsigned TSFlags; // Target Specific Flag values + uint64_t TSFlags; // Target Specific Flag values const unsigned *ImplicitUses; // Registers implicitly read by this instr const unsigned *ImplicitDefs; // Registers implicitly defined by this instr const TargetRegisterClass **RCBarriers; // Reg classes completely "clobbered" diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 2e5697e..6e69914 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -20,12 +20,14 @@ namespace llvm { class CalleeSavedInfo; +class InstrItineraryData; class LiveVariables; class MCAsmInfo; class MachineMemOperand; class MDNode; class MCInst; class SDNode; +class ScheduleHazardRecognizer; class SelectionDAG; class TargetRegisterClass; class TargetRegisterInfo; @@ -120,10 +122,6 @@ public: SrcReg == DstReg) return true; - if (MI.getOpcode() == TargetOpcode::EXTRACT_SUBREG && - MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) - return true; - if ((MI.getOpcode() == TargetOpcode::INSERT_SUBREG || MI.getOpcode() == TargetOpcode::SUBREG_TO_REG) && MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) @@ -194,11 +192,22 @@ public: /// reMaterialize - Re-issue the specified 'original' instruction at the /// specific location targeting a new destination register. + /// The register in Orig->getOperand(0).getReg() will be substituted by + /// DestReg:SubIdx. Any existing subreg index is preserved or composed with + /// SubIdx. virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const = 0; + const TargetRegisterInfo &TRI) const = 0; + + /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the + /// two-addrss instruction inserted by two-address pass. + virtual void scheduleTwoAddrSource(MachineInstr *SrcMI, + MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const { + // Do nothing. + } /// duplicate - Create a duplicate of the Orig instruction in MF. This is like /// MachineFunction::CloneMachineInstr(), but the target may update operands @@ -224,23 +233,19 @@ public: return 0; } - /// commuteInstruction - If a target has any instructions that are commutable, - /// but require converting to a different instruction or making non-trivial - /// changes to commute them, this method can overloaded to do this. The - /// default implementation of this method simply swaps the first two operands - /// of MI and returns it. - /// - /// If a target wants to make more aggressive changes, they can construct and - /// return a new machine instruction. If an instruction cannot commute, it - /// can also return null. - /// - /// If NewMI is true, then a new machine instruction must be created. - /// + /// commuteInstruction - If a target has any instructions that are + /// commutable but require converting to different instructions or making + /// non-trivial changes to commute them, this method can overloaded to do + /// that. The default implementation simply swaps the commutable operands. + /// If NewMI is false, MI is modified in place and returned; otherwise, a + /// new machine instruction is created and returned. Do not call this + /// method for a non-commutable instruction, but there may be some cases + /// where this method fails and returns null. virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const = 0; /// findCommutedOpIndices - If specified MI is commutable, return the two - /// operand indices that would swap value. Return true if the instruction + /// operand indices that would swap value. Return false if the instruction /// is not in a form which this routine understands. virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const = 0; @@ -302,25 +307,60 @@ public: /// branch to analyze. At least this much must be implemented, else tail /// merging needs to be disabled. virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const { assert(0 && "Target didn't implement TargetInstrInfo::InsertBranch!"); return 0; } + + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything + /// after it, replacing it with an unconditional branch to NewDest. This is + /// used by the tail merging pass. + virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const = 0; + + /// isLegalToSplitMBBAt - Return true if it's legal to split the given basic + /// block at the specified instruction (i.e. instruction would be the start + /// of a new basic block). + virtual bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + return true; + } + + /// isProfitableToIfCvt - Return true if it's profitable to first "NumInstrs" + /// of the specified basic block. + virtual + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const { + return false; + } - /// copyRegToReg - Emit instructions to copy between a pair of registers. It - /// returns false if the target does not how to copy between the specified - /// registers. - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - assert(0 && "Target didn't implement TargetInstrInfo::copyRegToReg!"); + /// isProfitableToIfCvt - Second variant of isProfitableToIfCvt, this one + /// checks for the case where two basic blocks from true and false path + /// of a if-then-else (diamond) are predicated on mutally exclusive + /// predicates. + virtual bool + isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs, + MachineBasicBlock &FMBB, unsigned NumFInstrs) const { + return false; + } + + /// isProfitableToDupForIfCvt - Return true if it's profitable for + /// if-converter to duplicate a specific number of instructions in the + /// specified MBB to enable if-conversion. + virtual bool + isProfitableToDupForIfCvt(MachineBasicBlock &MBB,unsigned NumInstrs) const { return false; } + /// copyPhysReg - Emit instructions to copy a pair of physical registers. + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + assert(0 && "Target didn't implement TargetInstrInfo::copyPhysReg!"); + } + /// storeRegToStackSlot - Store the specified register of the given register /// class to the specified stack frame index. The store instruction is to be /// added to the given machine basic block before the specified machine @@ -387,19 +427,17 @@ public: /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified - /// operand folded, otherwise NULL is returned. The client is responsible for - /// removing the old instruction and adding the new one in the instruction - /// stream. - MachineInstr* foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, + /// operand folded, otherwise NULL is returned. + /// The new instruction is inserted before MI, and the client is responsible + /// for removing the old instruction. + MachineInstr* foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, int FrameIndex) const; /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. - MachineInstr* foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, + MachineInstr* foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const; @@ -429,9 +467,7 @@ public: /// folding is possible. virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - return false; - } + const SmallVectorImpl &Ops) const =0; /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is @@ -548,6 +584,13 @@ public: return true; } + /// isSchedulingBoundary - Test if the given instruction should be + /// considered a scheduling boundary. This primarily includes labels and + /// terminators. + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const = 0; + /// GetInstSize - Returns the size of the specified Instruction. /// virtual unsigned GetInstSizeInBytes(const MachineInstr *MI) const { @@ -564,6 +607,12 @@ public: /// length. virtual unsigned getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const; + + /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer + /// to use for this target when scheduling the machine instructions after + /// register allocation. + virtual ScheduleHazardRecognizer* + CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const = 0; }; /// TargetInstrInfoImpl - This is the default implementation of @@ -575,22 +624,32 @@ protected: TargetInstrInfoImpl(const TargetInstrDesc *desc, unsigned NumOpcodes) : TargetInstrInfo(desc, NumOpcodes) {} public: + virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) const; virtual MachineInstr *commuteInstruction(MachineInstr *MI, bool NewMI = false) const; virtual bool findCommutedOpIndices(MachineInstr *MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2) const; + virtual bool canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const; virtual bool PredicateInstruction(MachineInstr *MI, const SmallVectorImpl &Pred) const; virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubReg, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; virtual MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; virtual unsigned GetFunctionSizeInBytes(const MachineFunction &MF) const; + + virtual ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const; }; } // End llvm namespace diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index 3dfa8bc..39648c2 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -106,7 +106,8 @@ struct InstrItinerary { /// Instruction itinerary Data - Itinerary data supplied by a subtarget to be /// used by a target. /// -struct InstrItineraryData { +class InstrItineraryData { +public: const InstrStage *Stages; ///< Array of stages selected const unsigned *OperandCycles; ///< Array of operand cycles selected const InstrItinerary *Itineratries; ///< Array of itineraries selected diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5efebe6..2b6e4fa 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -24,6 +24,7 @@ #include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" +#include "llvm/Attributes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/ADT/APFloat.h" @@ -32,6 +33,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/DebugLoc.h" +#include "llvm/Target/TargetCallingConv.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -42,6 +44,7 @@ namespace llvm { class CallInst; class Function; class FastISel; + class FunctionLoweringInfo; class MachineBasicBlock; class MachineFunction; class MachineFrameInfo; @@ -114,7 +117,7 @@ public: /// isSelectExpensive - Return true if the select operation is expensive for /// this target. bool isSelectExpensive() const { return SelectIsExpensive; } - + /// isIntDivCheap() - Return true if integer divide is usually cheaper than /// a sequence of several shifts, adds, and multiplies for this target. bool isIntDivCheap() const { return IntDivIsCheap; } @@ -131,10 +134,10 @@ public: virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; - /// getCmpLibcallReturnType - Return the ValueType for comparison + /// getCmpLibcallReturnType - Return the ValueType for comparison /// libcalls. Comparions libcalls include floating point comparion calls, /// and Ordered/Unordered check calls on floating point numbers. - virtual + virtual MVT::SimpleValueType getCmpLibcallReturnType() const; /// getBooleanContents - For targets without i1 registers, this gives the @@ -208,7 +211,7 @@ public: ValueTypeActions[I] = Action; } }; - + const ValueTypeActionImpl &getValueTypeActions() const { return ValueTypeActions; } @@ -229,7 +232,7 @@ public: /// returns the integer type to transform to. EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { if (VT.isSimple()) { - assert((unsigned)VT.getSimpleVT().SimpleTy < + assert((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(TransformToType)); EVT NVT = TransformToType[VT.getSimpleVT().SimpleTy]; assert(getTypeAction(Context, NVT) != Promote && @@ -256,7 +259,7 @@ public: return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2); else // Promote to a power of two size, avoiding multi-step promotion. - return getTypeAction(Context, NVT) == Promote ? + return getTypeAction(Context, NVT) == Promote ? getTypeToTransformTo(Context, NVT) : NVT; } assert(0 && "Unsupported extended type!"); @@ -302,11 +305,11 @@ public: /// intrinsic will need to map to a MemIntrinsicNode (touches memory). If /// this is the case, it returns true and store the intrinsic /// information into the IntrinsicInfo that was passed to the function. - struct IntrinsicInfo { + struct IntrinsicInfo { unsigned opc; // target opcode EVT memVT; // memory VT const Value* ptrVal; // value representing memory location - int offset; // offset off of ptrVal + int offset; // offset off of ptrVal unsigned align; // alignment bool vol; // is volatile? bool readMem; // reads memory? @@ -324,7 +327,7 @@ public: virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const { return false; } - + /// isShuffleMaskLegal - Targets can use this to indicate that they only /// support *some* VECTOR_SHUFFLE operations, those with specific masks. /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values @@ -446,7 +449,7 @@ public: "Table isn't big enough!"); unsigned Ty = (unsigned)VT.getSimpleVT().SimpleTy; return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); - } + } /// isIndexedStoreLegal - Return true if the specified indexed load is legal /// on this target. @@ -492,7 +495,7 @@ public: assert((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."); - + EVT NVT = VT; do { NVT = (MVT::SimpleValueType)(NVT.getSimpleVT().SimpleTy+1); @@ -516,14 +519,14 @@ public: /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. virtual unsigned getByValTypeAlignment(const Type *Ty) const; - + /// getRegisterType - Return the type of registers that this ValueType will /// eventually require. EVT getRegisterType(MVT VT) const { assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); return RegisterTypeForVT[VT.SimpleTy]; } - + /// getRegisterType - Return the type of registers that this ValueType will /// eventually require. EVT getRegisterType(LLVMContext &Context, EVT VT) const { @@ -606,7 +609,7 @@ public: /// of the specified type. This is used, for example, in situations where an /// array copy/move/set is converted to a sequence of store operations. It's /// use helps to ensure that such replacements don't generate code that causes - /// an alignment error (trap) on the target machine. + /// an alignment error (trap) on the target machine. /// @brief Determine if the target supports unaligned memory accesses. virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { return false; @@ -637,7 +640,7 @@ public: MachineFunction &MF) const { return MVT::Other; } - + /// usesUnderscoreSetJmp - Determine if we should use _setjmp or setjmp /// to implement llvm.setjmp. bool usesUnderscoreSetJmp() const { @@ -683,17 +686,10 @@ public: return JumpBufAlignment; } - /// getIfCvtBlockLimit - returns the target specific if-conversion block size - /// limit. Any block whose size is greater should not be predicated. - unsigned getIfCvtBlockSizeLimit() const { - return IfCvtBlockSizeLimit; - } - - /// getIfCvtDupBlockLimit - returns the target specific size limit for a - /// block to be considered for duplication. Any block whose size is greater - /// should not be duplicated to facilitate its predication. - unsigned getIfCvtDupBlockSizeLimit() const { - return IfCvtDupBlockSizeLimit; + /// getMinStackArgumentAlignment - return the minimum stack alignment of an + /// argument. + unsigned getMinStackArgumentAlignment() const { + return MinStackArgumentAlignment; } /// getPrefLoopAlignment - return the preferred loop alignment. @@ -701,7 +697,14 @@ public: unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; } - + + /// getShouldFoldAtomicFences - return whether the combiner should fold + /// fence MEMBARRIER instructions into the atomic intrinsic instructions. + /// + bool getShouldFoldAtomicFences() const { + return ShouldFoldAtomicFences; + } + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -711,7 +714,7 @@ public: SelectionDAG &DAG) const { return false; } - + /// getPostIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if this node can be /// combined with a load / store to form a post-indexed load / store. @@ -721,12 +724,12 @@ public: SelectionDAG &DAG) const { return false; } - + /// getJumpTableEncoding - Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; - + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, @@ -734,7 +737,7 @@ public: assert(0 && "Need to implement this hook if target has custom JTIs"); return 0; } - + /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC /// jumptable. virtual SDValue getPICJumpTableRelocBase(SDValue Table, @@ -746,7 +749,7 @@ public: virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const; - + /// isOffsetFoldingLegal - Return true if folding a constant offset /// with the given GlobalAddress is legal. It is frequently not legal in /// PIC relocation models. @@ -755,36 +758,42 @@ public: /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *) const = 0; + /// getStackCookieLocation - Return true if the target stores stack + /// protector cookies at a fixed offset in some non-standard address + /// space, and populates the address space and offset as + /// appropriate. + virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { + return false; + } + //===--------------------------------------------------------------------===// // TargetLowering Optimization Methods // - + /// TargetLoweringOpt - A convenience struct that encapsulates a DAG, and two /// SDValues for returning information from TargetLowering to its clients - /// that want to combine + /// that want to combine struct TargetLoweringOpt { SelectionDAG &DAG; bool LegalTys; bool LegalOps; - bool ShrinkOps; SDValue Old; SDValue New; explicit TargetLoweringOpt(SelectionDAG &InDAG, - bool LT, bool LO, - bool Shrink = false) : - DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {} + bool LT, bool LO) : + DAG(InDAG), LegalTys(LT), LegalOps(LO) {} bool LegalTypes() const { return LegalTys; } bool LegalOperations() const { return LegalOps; } - - bool CombineTo(SDValue O, SDValue N) { - Old = O; - New = N; + + bool CombineTo(SDValue O, SDValue N) { + Old = O; + New = N; return true; } - - /// ShrinkDemandedConstant - Check to see if the specified operand of the + + /// ShrinkDemandedConstant - Check to see if the specified operand of the /// specified instruction is a constant integer. If so, check to see if /// there are any bits set in the constant that are not demanded. If so, /// shrink the constant and return true. @@ -797,25 +806,25 @@ public: bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, DebugLoc dl); }; - + /// SimplifyDemandedBits - Look at Op. At this point, we know that only the /// DemandedMask bits of the result of Op are ever used downstream. If we can /// use this information to simplify Op, create a new simplified DAG node and - /// return true, returning the original and new nodes in Old and New. - /// Otherwise, analyze the expression and return a mask of KnownOne and - /// KnownZero bits for the expression (used to simplify the caller). - /// The KnownZero/One bits may only be accurate for those bits in the + /// return true, returning the original and new nodes in Old and New. + /// Otherwise, analyze the expression and return a mask of KnownOne and + /// KnownZero bits for the expression (used to simplify the caller). + /// The KnownZero/One bits may only be accurate for those bits in the /// DemandedMask. - bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, + bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, APInt &KnownZero, APInt &KnownOne, TargetLoweringOpt &TLO, unsigned Depth = 0) const; - + /// computeMaskedBitsForTargetNode - Determine which of the bits specified in - /// Mask are known to be either zero or one and return them in the + /// Mask are known to be either zero or one and return them in the /// KnownZero/KnownOne bitsets. virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -825,7 +834,7 @@ public: /// DAG Combiner. virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, unsigned Depth = 0) const; - + struct DAGCombinerInfo { void *DC; // The DAG Combiner object. bool BeforeLegalize; @@ -833,15 +842,15 @@ public: bool CalledByLegalizer; public: SelectionDAG &DAG; - + DAGCombinerInfo(SelectionDAG &dag, bool bl, bool blo, bool cl, void *dc) : DC(dc), BeforeLegalize(bl), BeforeLegalizeOps(blo), CalledByLegalizer(cl), DAG(dag) {} - + bool isBeforeLegalize() const { return BeforeLegalize; } bool isBeforeLegalizeOps() const { return BeforeLegalizeOps; } bool isCalledByLegalizer() const { return CalledByLegalizer; } - + void AddToWorklist(SDNode *N); SDValue CombineTo(SDNode *N, const std::vector &To, bool AddTo = true); @@ -851,7 +860,7 @@ public: void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); }; - /// SimplifySetCC - Try to simplify a setcc built with the specified operands + /// SimplifySetCC - Try to simplify a setcc built with the specified operands /// and cc. If it is unable to simplify it, return a null SDValue. SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, @@ -892,7 +901,7 @@ public: virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { return false; } - + //===--------------------------------------------------------------------===// // TargetLowering Configuration Methods - These methods should be invoked by // the derived class constructor to configure this object for the target. @@ -932,7 +941,7 @@ protected: void setStackPointerRegisterToSaveRestore(unsigned R) { StackPointerRegisterToSaveRestore = R; } - + /// setExceptionPointerRegister - If set to a physical register, this sets /// the register that receives the exception address on entry to a landing /// pad. @@ -955,12 +964,12 @@ protected: /// expensive, and if possible, should be replaced by an alternate sequence /// of instructions not containing an integer divide. void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } - + /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. void setPow2DivIsCheap(bool isCheap = true) { Pow2DivIsCheap = isCheap; } - + /// addRegisterClass - Add the specified register class as an available /// regclass for the specified value type. This indicates the selector can /// handle values of that class natively. @@ -983,7 +992,7 @@ protected: assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action; } - + /// setLoadExtAction - Indicate that the specified load with extension does /// not work with the specified type and indicate what to do about it. void setLoadExtAction(unsigned ExtType, MVT VT, @@ -993,7 +1002,7 @@ protected: "Table isn't big enough!"); LoadExtActions[VT.SimpleTy][ExtType] = (uint8_t)Action; } - + /// setTruncStoreAction - Indicate that the specified truncating store does /// not work with the specified type and indicate what to do about it. void setTruncStoreAction(MVT ValVT, MVT MemVT, @@ -1018,7 +1027,7 @@ protected: IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; } - + /// setIndexedStoreAction - Indicate that the specified indexed store does or /// does not work with the specified type and indicate what to do about /// it. NOTE: All indexed mode stores are initialized to Expand in @@ -1033,7 +1042,7 @@ protected: IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); } - + /// setCondCodeAction - Indicate that the specified condition code is or isn't /// supported on the target and indicate what to do about it. void setCondCodeAction(ISD::CondCode CC, MVT VT, @@ -1060,7 +1069,7 @@ protected: assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); } - + /// setJumpBufSize - Set the target's required jmp_buf buffer size (in /// bytes); default is 200 void setJumpBufSize(unsigned Size) { @@ -1073,25 +1082,24 @@ protected: JumpBufAlignment = Align; } - /// setIfCvtBlockSizeLimit - Set the target's if-conversion block size - /// limit (in number of instructions); default is 2. - void setIfCvtBlockSizeLimit(unsigned Limit) { - IfCvtBlockSizeLimit = Limit; - } - - /// setIfCvtDupBlockSizeLimit - Set the target's block size limit (in number - /// of instructions) to be considered for code duplication during - /// if-conversion; default is 2. - void setIfCvtDupBlockSizeLimit(unsigned Limit) { - IfCvtDupBlockSizeLimit = Limit; - } - /// setPrefLoopAlignment - Set the target's preferred loop alignment. Default /// alignment is zero, it means the target does not care about loop alignment. void setPrefLoopAlignment(unsigned Align) { PrefLoopAlignment = Align; } - + + /// setMinStackArgumentAlignment - Set the minimum stack alignment of an + /// argument. + void setMinStackArgumentAlignment(unsigned Align) { + MinStackArgumentAlignment = Align; + } + + /// setShouldFoldAtomicFences - Set if the target's implementation of the + /// atomic operation intrinsics includes locking. Default is false. + void setShouldFoldAtomicFences(bool fold) { + ShouldFoldAtomicFences = fold; + } + public: //===--------------------------------------------------------------------===// // Lowering methods - These methods must be implemented by targets so that @@ -1151,6 +1159,7 @@ public: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1163,9 +1172,8 @@ public: /// registers. If false is returned, an sret-demotion is performed. /// virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const + const SmallVectorImpl &Outs, + LLVMContext &Context) const { // Return true by default to get preexisting behavior. return true; @@ -1179,6 +1187,7 @@ public: virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { assert(0 && "Not Implemented"); return SDValue(); // this is here to silence compiler errors @@ -1200,7 +1209,7 @@ public: SmallVectorImpl &Results, SelectionDAG &DAG) const; - /// LowerOperation - This callback is invoked for operations that are + /// LowerOperation - This callback is invoked for operations that are /// unsupported by the target, which are registered to use 'custom' lowering, /// and whose defined values are all legal. /// If the target has no operations that require custom lowering, it need not @@ -1227,23 +1236,14 @@ public: /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel * - createFastISel(MachineFunction &, - DenseMap &, - DenseMap &, - DenseMap &, - std::vector > & -#ifndef NDEBUG - , SmallSet &CatchInfoLost -#endif - ) const { + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const { return 0; } //===--------------------------------------------------------------------===// // Inline Asm Support hooks // - + /// ExpandInlineAsm - This hook allows the target to expand an inline asm /// call to be explicit llvm code if it wants to. This is useful for /// turning simple inline asms into LLVM intrinsics, which gives the @@ -1251,7 +1251,7 @@ public: virtual bool ExpandInlineAsm(CallInst *CI) const { return false; } - + enum ConstraintType { C_Register, // Constraint represents specific register(s). C_RegisterClass, // Constraint represents any of register(s) in class. @@ -1259,7 +1259,7 @@ public: C_Other, // Something else. C_Unknown // Unsupported constraint. }; - + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. struct AsmOperandInfo : public InlineAsm::ConstraintInfo { @@ -1271,25 +1271,25 @@ public: /// ConstraintType - Information about the constraint code, e.g. Register, /// RegisterClass, Memory, Other, Unknown. TargetLowering::ConstraintType ConstraintType; - + /// CallOperandval - If this is the result output operand or a /// clobber, this is null, otherwise it is the incoming operand to the /// CallInst. This gets modified as the asm is processed. Value *CallOperandVal; - + /// ConstraintVT - The ValueType for the operand value. EVT ConstraintVT; - + /// isMatchingInputConstraint - Return true of this is an input operand that /// is a matching constraint like "4". bool isMatchingInputConstraint() const; - + /// getMatchedOperand - If this is an input matching constraint, this method /// returns the output operand it matches. unsigned getMatchedOperand() const; - + AsmOperandInfo(const InlineAsm::ConstraintInfo &info) - : InlineAsm::ConstraintInfo(info), + : InlineAsm::ConstraintInfo(info), ConstraintType(TargetLowering::C_Unknown), CallOperandVal(0), ConstraintVT(MVT::Other) { } @@ -1299,21 +1299,19 @@ public: /// type to use for the specific AsmOperandInfo, setting /// OpInfo.ConstraintCode and OpInfo.ConstraintType. If the actual operand /// being passed in is available, it can be passed in as Op, otherwise an - /// empty SDValue can be passed. If hasMemory is true it means one of the asm - /// constraint of the inline asm instruction being processed is 'm'. + /// empty SDValue can be passed. virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, - bool hasMemory, SelectionDAG *DAG = 0) const; - + /// getConstraintType - Given a constraint, return the type of constraint it /// is for this target. virtual ConstraintType getConstraintType(const std::string &Constraint) const; - + /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"), /// return a list of registers that can be used to satisfy the constraint. /// This should only be used for C_RegisterClass constraints. - virtual std::vector + virtual std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -1327,29 +1325,26 @@ public: /// /// This should only be used for C_Register constraints. On error, /// this returns a register number of 0 and a null register class pointer.. - virtual std::pair + virtual std::pair getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + /// LowerXConstraint - try to replace an X constraint, which matches anything, /// with another that has more specific requirements based on the type of the /// corresponding operand. This returns null if there is no replacement to /// make. virtual const char *LowerXConstraint(EVT ConstraintVT) const; - + /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops - /// vector. If it is invalid, don't add anything to Ops. If hasMemory is true - /// it means one of the asm constraint of the inline asm instruction being - /// processed is 'm'. + /// vector. If it is invalid, don't add anything to Ops. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const; - + //===--------------------------------------------------------------------===// // Instruction Emitting Hooks // - + // EmitInstrWithCustomInserter - This method should be implemented by targets // that mark instructions with the 'usesCustomInserter' flag. These // instructions are special in various ways, which require special support to @@ -1378,7 +1373,7 @@ public: int64_t Scale; AddrMode() : BaseGV(0), BaseOffs(0), HasBaseReg(false), Scale(0) {} }; - + /// isLegalAddressingMode - Return true if the addressing mode represented by /// AM is legal for this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing @@ -1431,9 +1426,9 @@ public: //===--------------------------------------------------------------------===// // Div utility functions // - SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, + SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, std::vector* Created) const; - SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, + SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, std::vector* Created) const; @@ -1470,7 +1465,7 @@ public: void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { LibcallCallingConvs[Call] = CC; } - + /// getLibcallCallingConv - Get the CallingConv that should be used for the /// specified libcall. CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { @@ -1499,12 +1494,12 @@ private: /// a real cost model is in place. If we ever optimize for size, this will be /// set to true unconditionally. bool IntDivIsCheap; - + /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. bool Pow2DivIsCheap; - + /// UseUnderscoreSetJmp - This target prefers to use _setjmp to implement /// llvm.setjmp. Defaults to false. bool UseUnderscoreSetJmp; @@ -1524,26 +1519,28 @@ private: /// SchedPreferenceInfo - The target scheduling preference: shortest possible /// total cycles or lowest register usage. Sched::Preference SchedPreferenceInfo; - + /// JumpBufSize - The size, in bytes, of the target's jmp_buf buffers unsigned JumpBufSize; - + /// JumpBufAlignment - The alignment, in bytes, of the target's jmp_buf /// buffers unsigned JumpBufAlignment; - /// IfCvtBlockSizeLimit - The maximum allowed size for a block to be - /// if-converted. - unsigned IfCvtBlockSizeLimit; - - /// IfCvtDupBlockSizeLimit - The maximum allowed size for a block to be - /// duplicated during if-conversion. - unsigned IfCvtDupBlockSizeLimit; + /// MinStackArgumentAlignment - The minimum alignment that any argument + /// on the stack needs to have. + /// + unsigned MinStackArgumentAlignment; /// PrefLoopAlignment - The perferred loop alignment. /// unsigned PrefLoopAlignment; + /// ShouldFoldAtomicFences - Whether fencing MEMBARRIER instructions should + /// be folded into the enclosed atomic intrinsic instruction by the + /// combiner. + bool ShouldFoldAtomicFences; + /// StackPointerRegisterToSaveRestore - If set to a physical register, this /// specifies the register that llvm.savestack/llvm.restorestack should save /// and restore. @@ -1583,12 +1580,12 @@ private: /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; - + /// LoadExtActions - For each load extension type and each value type, /// keep a LegalizeAction that indicates how instruction selection should deal /// with a load of a specific value type and extension type. uint8_t LoadExtActions[MVT::LAST_VALUETYPE][ISD::LAST_LOADEXT_TYPE]; - + /// TruncStoreActions - For each value type pair keep a LegalizeAction that /// indicates whether a truncating store of a specific value type and /// truncating type is legal. @@ -1600,7 +1597,7 @@ private: /// value_type for the reference. The second dimension represents the various /// modes for load store. uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; - + /// CondCodeActions - For each condition code (ISD::CondCode) keep a /// LegalizeAction that indicates how instruction selection should /// deal with the condition code. @@ -1615,7 +1612,7 @@ private: /// which sets a bit in this array. unsigned char TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; - + /// PromoteToType - For operations that must be promoted to a specific type, /// this holds the destination type. This map should be sparse, so don't hold /// it as an array. @@ -1676,6 +1673,15 @@ protected: /// optimization. bool benefitFromCodePlacementOpt; }; + +/// GetReturnInfo - Given an LLVM IR type and return type attributes, +/// compute the return value EVTs and flags, and optionally also +/// the offsets, if the return value is being lowered to memory. +void GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl &Outs, + const TargetLowering &TLI, + SmallVectorImpl *Offsets = 0); + } // end llvm namespace #endif diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h index c4deaa8..cb772ec 100644 --- a/include/llvm/Target/TargetOpcodes.h +++ b/include/llvm/Target/TargetOpcodes.h @@ -15,52 +15,54 @@ #define LLVM_TARGET_TARGETOPCODES_H namespace llvm { - + /// Invariant opcodes: All instruction sets have these as their low opcodes. +/// +/// Every instruction defined here must also appear in Target.td and the order +/// must be the same as in CodeGenTarget.cpp. +/// namespace TargetOpcode { - enum { + enum { PHI = 0, INLINEASM = 1, DBG_LABEL = 2, EH_LABEL = 3, GC_LABEL = 4, - + /// KILL - This instruction is a noop that is used only to adjust the /// liveness of registers. This can be useful when dealing with /// sub-registers. KILL = 5, - + /// EXTRACT_SUBREG - This instruction takes two operands: a register /// that has subregisters, and a subregister index. It returns the /// extracted subregister value. This is commonly used to implement /// truncation operations on target architectures which support it. EXTRACT_SUBREG = 6, - - /// INSERT_SUBREG - This instruction takes three operands: a register - /// that has subregisters, a register providing an insert value, and a - /// subregister index. It returns the value of the first register with - /// the value of the second register inserted. The first register is - /// often defined by an IMPLICIT_DEF, as is commonly used to implement + + /// INSERT_SUBREG - This instruction takes three operands: a register that + /// has subregisters, a register providing an insert value, and a + /// subregister index. It returns the value of the first register with the + /// value of the second register inserted. The first register is often + /// defined by an IMPLICIT_DEF, because it is commonly used to implement /// anyext operations on target architectures which support it. INSERT_SUBREG = 7, - + /// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef. IMPLICIT_DEF = 8, - - /// SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except - /// that the first operand is an immediate integer constant. This constant - /// is often zero, as is commonly used to implement zext operations on - /// target architectures which support it, such as with x86-64 (with - /// zext from i32 to i64 via implicit zero-extension). + + /// SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that + /// the first operand is an immediate integer constant. This constant is + /// often zero, because it is commonly used to assert that the instruction + /// defining the register implicitly clears the high bits. SUBREG_TO_REG = 9, - + /// COPY_TO_REGCLASS - This instruction is a placeholder for a plain /// register-to-register copy into a specific register class. This is only /// used between instruction selection and MachineInstr creation, before /// virtual registers have been created for all the instructions, and it's /// only needed in cases where the register classes implied by the - /// instructions are insufficient. The actual MachineInstrs to perform - /// the copy are emitted with the TargetInstrInfo::copyRegToReg hook. + /// instructions are insufficient. It is emitted as a COPY MachineInstr. COPY_TO_REGCLASS = 10, /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic @@ -72,7 +74,11 @@ namespace TargetOpcode { /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5 /// After register coalescing references of v1024 should be replace with /// v1027:3, v1025 with v1027:4, etc. - REG_SEQUENCE = 12 + REG_SEQUENCE = 12, + + /// COPY - Target-independent register copy. This instruction can also be + /// used to copy between subregisters of virtual registers. + COPY = 13 }; } // end namespace TargetOpcode } // end namespace llvm diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 7c37b73..f6ac2b7 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -115,6 +115,11 @@ public: return RegSet.count(Reg); } + /// contains - Return true if both registers are in this class. + bool contains(unsigned Reg1, unsigned Reg2) const { + return contains(Reg1) && contains(Reg2); + } + /// hasType - return true if this TargetRegisterClass has the ValueType vt. /// bool hasType(EVT vt) const { @@ -313,11 +318,11 @@ public: return Reg >= FirstVirtualRegister; } - /// getPhysicalRegisterRegClass - Returns the Register Class of a physical - /// register of the given type. If type is EVT::Other, then just return any - /// register class the register belongs to. - virtual const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; + /// getMinimalPhysRegClass - Returns the Register Class of a physical + /// register of the given type, picking the most sub register class of + /// the right type that contains this physreg. + const TargetRegisterClass * + getMinimalPhysRegClass(unsigned Reg, EVT VT = MVT::Other) const; /// getAllocatableSet - Returns a bitset indexed by register number /// indicating if a register is allocatable or not. If a register class is @@ -438,11 +443,6 @@ public: virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0) const = 0; - /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred - /// register classes to spill each callee saved register with. The order and - /// length of this list match the getCalleeSaveRegs() list. - virtual const TargetRegisterClass* const *getCalleeSavedRegClasses( - const MachineFunction *MF) const =0; /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses @@ -456,7 +456,7 @@ public: virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0; /// getSubRegIndex - For a given register pair, return the sub-register index - /// if the are second register is a sub-register of the first. Return zero + /// if the second register is a sub-register of the first. Return zero /// otherwise. virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0; @@ -470,14 +470,15 @@ public: return 0; } - /// canCombinedSubRegIndex - Given a register class and a list of sub-register - /// indices, return true if it's possible to combine the sub-register indices - /// into one that corresponds to a larger sub-register. Return the new sub- - /// register index by reference. Note the new index by be zero if the given - /// sub-registers combined to form the whole register. - virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, - SmallVectorImpl &SubIndices, - unsigned &NewSubIdx) const { + /// canCombineSubRegIndices - Given a register class and a list of + /// subregister indices, return true if it's possible to combine the + /// subregister indices into one that corresponds to a larger + /// subregister. Return the new subregister index by reference. Note the + /// new index may be zero if the given subregisters can be combined to + /// form the whole register. + virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const { return 0; } @@ -490,6 +491,23 @@ public: return 0; } + /// composeSubRegIndices - Return the subregister index you get from composing + /// two subregister indices. + /// + /// If R:a:b is the same register as R:c, then composeSubRegIndices(a, b) + /// returns c. Note that composeSubRegIndices does not tell you about illegal + /// compositions. If R does not have a subreg a, or R:a does not have a subreg + /// b, composeSubRegIndices doesn't tell you. + /// + /// The ARM register Q0 has two D subregs dsub_0:D0 and dsub_1:D1. It also has + /// ssub_0:S0 - ssub_3:S3 subregs. + /// If you compose subreg indices dsub_1, ssub_0 you get ssub_2. + /// + virtual unsigned composeSubRegIndices(unsigned a, unsigned b) const { + // This default implementation is correct for most targets. + return b; + } + //===--------------------------------------------------------------------===// // Register Class Information // @@ -506,8 +524,8 @@ public: /// getRegClass - Returns the register class associated with the enumeration /// value. See class TargetOperandInfo. const TargetRegisterClass *getRegClass(unsigned i) const { - assert(i <= getNumRegClasses() && "Register Class ID out of range"); - return i ? RegClassBegin[i - 1] : NULL; + assert(i < getNumRegClasses() && "Register Class ID out of range"); + return RegClassBegin[i]; } /// getPointerRegClass - Returns a TargetRegisterClass used for pointer diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 5e17904..8fb4b63 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -45,6 +45,11 @@ ModulePass *createStripNonDebugSymbolsPass(); ModulePass *createStripDebugDeclarePass(); //===----------------------------------------------------------------------===// +// +// These pass removes unused symbols' debug info. +ModulePass *createStripDeadDebugInfoPass(); + +//===----------------------------------------------------------------------===// /// createLowerSetJmpPass - This function lowers the setjmp/longjmp intrinsics /// to invoke/unwind instructions. This should really be part of the C/C++ /// front-end, but it's so much easier to write transformations in LLVM proper. diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 5279e96..0f54450 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -66,24 +66,6 @@ void ReplaceInstWithInst(BasicBlock::InstListType &BIL, // void ReplaceInstWithInst(Instruction *From, Instruction *To); -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the -/// instruction before ScanFrom) checking to see if we have the value at the -/// memory address *Ptr locally available within a small number of instructions. -/// If the value is available, return it. -/// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't find -/// something that invalidates *Ptr or provides it, ScanFrom would be left at -/// begin() and this returns null. ScanFrom could also be left -/// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. If -/// it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. -Value *FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan = 6, - AliasAnalysis *AA = 0); - /// FindFunctionBackedges - Analyze the specified function to find all of the /// loop backedges in the function and return them. This is a relatively cheap /// (compared to computing dominators and loop info) analysis. diff --git a/include/llvm/Transforms/Utils/BuildLibCalls.h b/include/llvm/Transforms/Utils/BuildLibCalls.h index 6df3469..c75c142 100644 --- a/include/llvm/Transforms/Utils/BuildLibCalls.h +++ b/include/llvm/Transforms/Utils/BuildLibCalls.h @@ -34,6 +34,10 @@ namespace llvm { /// and the return value has 'i8*' type. Value *EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, const TargetData *TD); + /// EmitStrNCmp - Emit a call to the strncmp function to the builder. + Value *EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, + const TargetData *TD); + /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 22bdc99..1ca4981 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -18,7 +18,7 @@ #ifndef LLVM_TRANSFORMS_UTILS_CLONING_H #define LLVM_TRANSFORMS_UTILS_CLONING_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ValueHandle.h" @@ -46,7 +46,7 @@ class AllocaInst; /// CloneModule - Return an exact copy of the specified module /// Module *CloneModule(const Module *M); -Module *CloneModule(const Module *M, DenseMap &ValueMap); +Module *CloneModule(const Module *M, ValueMap &VMap); /// ClonedCodeInfo - This struct can be used to capture information about code /// being cloned, while it is being cloned. @@ -89,7 +89,7 @@ struct ClonedCodeInfo { /// incoming edges. /// /// The correlation between instructions in the source and result basic blocks -/// is recorded in the ValueMap map. +/// is recorded in the VMap map. /// /// If you have a particular suffix you'd like to use to add to any cloned /// names, specify it as the optional third parameter. @@ -102,34 +102,34 @@ struct ClonedCodeInfo { /// parameter. /// BasicBlock *CloneBasicBlock(const BasicBlock *BB, - DenseMap &ValueMap, + ValueMap &VMap, const Twine &NameSuffix = "", Function *F = 0, ClonedCodeInfo *CodeInfo = 0); /// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate -/// ValueMap using old blocks to new blocks mapping. +/// VMap using old blocks to new blocks mapping. Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, - DenseMap &ValueMap, Pass *P); + ValueMap &VMap, Pass *P); /// CloneFunction - Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified -/// in the ValueMap are changed to refer to their mapped value instead of the -/// original one. If any of the arguments to the function are in the ValueMap, -/// the arguments are deleted from the resultant function. The ValueMap is +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is /// updated to include mappings from all of the instructions and basicblocks in /// the function from their old to new values. The final argument captures /// information about the cloned code if non-null. /// Function *CloneFunction(const Function *F, - DenseMap &ValueMap, + ValueMap &VMap, ClonedCodeInfo *CodeInfo = 0); -/// CloneFunction - Version of the function that doesn't need the ValueMap. +/// CloneFunction - Version of the function that doesn't need the VMap. /// inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){ - DenseMap ValueMap; - return CloneFunction(F, ValueMap, CodeInfo); + ValueMap VMap; + return CloneFunction(F, VMap, CodeInfo); } /// Clone OldFunc into NewFunc, transforming the old arguments into references @@ -139,7 +139,7 @@ inline Function *CloneFunction(const Function *F, ClonedCodeInfo *CodeInfo = 0){ /// specified suffix to all values cloned. /// void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap &ValueMap, + ValueMap &VMap, SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = 0); @@ -152,7 +152,7 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc, /// dead. Since this doesn't produce an exactly copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap &ValueMap, + ValueMap &VMap, SmallVectorImpl &Returns, const char *NameSuffix = "", ClonedCodeInfo *CodeInfo = 0, diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index bb6fd56..b277970 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -31,17 +31,6 @@ class TargetData; template class SmallVectorImpl; //===----------------------------------------------------------------------===// -// Local analysis. -// - -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const TargetData *TD = 0); - -//===----------------------------------------------------------------------===// // Local constant propagation. // diff --git a/include/llvm/Type.h b/include/llvm/Type.h index 52229ac..617ef69 100644 --- a/include/llvm/Type.h +++ b/include/llvm/Type.h @@ -504,19 +504,19 @@ inline void PATypeHandle::removeUser() { /// reference to the type. /// inline Type* PATypeHolder::get() const { + if (Ty == 0) return 0; const Type *NewTy = Ty->getForwardedType(); if (!NewTy) return const_cast(Ty); return *const_cast(this) = NewTy; } inline void PATypeHolder::addRef() { - assert(Ty && "Type Holder has a null type!"); - if (Ty->isAbstract()) + if (Ty && Ty->isAbstract()) Ty->addRef(); } inline void PATypeHolder::dropRef() { - if (Ty->isAbstract()) + if (Ty && Ty->isAbstract()) Ty->dropRef(); } diff --git a/include/llvm/Use.h b/include/llvm/Use.h index 970f69b..2759338 100644 --- a/include/llvm/Use.h +++ b/include/llvm/Use.h @@ -27,6 +27,7 @@ #include "llvm/Support/Casting.h" #include "llvm/ADT/PointerIntPair.h" +#include #include namespace llvm { diff --git a/include/llvm/Value.h b/include/llvm/Value.h index bc25a0f..16b6207 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -93,8 +93,8 @@ protected: /// printing behavior. virtual void printCustom(raw_ostream &O) const; -public: Value(const Type *Ty, unsigned scid); +public: virtual ~Value(); /// dump - Support for debugging, callable in GDB: V->dump() @@ -210,7 +210,7 @@ public: UndefValueVal, // This is an instance of UndefValue BlockAddressVal, // This is an instance of BlockAddress ConstantExprVal, // This is an instance of ConstantExpr - ConstantAggregateZeroVal, // This is an instance of ConstantAggregateNull + ConstantAggregateZeroVal, // This is an instance of ConstantAggregateZero ConstantIntVal, // This is an instance of ConstantInt ConstantFPVal, // This is an instance of ConstantFP ConstantArrayVal, // This is an instance of ConstantArray diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 371dcaf..503fbbd 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -233,10 +233,12 @@ bool llvm::isNoAliasCall(const Value *V) { /// NoAlias returns /// bool llvm::isIdentifiedObject(const Value *V) { - if (isa(V) || isNoAliasCall(V)) + if (isa(V)) return true; if (isa(V) && !isa(V)) return true; + if (isNoAliasCall(V)) + return true; if (const Argument *A = dyn_cast(V)) return A->hasNoAliasAttr() || A->hasByValAttr(); return false; diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index bfa3ff1..37ee9fc 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" -#include "llvm/Target/TargetData.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/CommandLine.h" diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index 88c2875..bc2d9c55 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -45,8 +45,12 @@ namespace { InitializeAliasAnalysis(this); // set up super class for(Module::global_iterator I = M.global_begin(), - E = M.global_end(); I != E; ++I) + E = M.global_end(); I != E; ++I) { Vals.insert(&*I); + for (User::const_op_iterator OI = I->op_begin(), + OE = I->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } for(Module::iterator I = M.begin(), E = M.end(); I != E; ++I){ @@ -58,8 +62,12 @@ namespace { for (Function::const_iterator FI = I->begin(), FE = I->end(); FI != FE; ++FI) for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) + BI != BE; ++BI) { Vals.insert(&*BI); + for (User::const_op_iterator OI = BI->op_begin(), + OE = BI->op_end(); OI != OE; ++OI) + Vals.insert(*OI); + } } } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index cfe7a1c..4f53a6d 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -78,6 +78,20 @@ static bool isNonEscapingLocalObject(const Value *V) { return false; } +/// isEscapeSource - Return true if the pointer is one which would have +/// been considered an escape by isNonEscapingLocalObject. +static bool isEscapeSource(const Value *V) { + if (isa(V) || isa(V) || isa(V)) + return true; + + // The load case works because isNonEscapingLocalObject considers all + // stores to be escapes (it passes true for the StoreCaptures argument + // to PointerMayBeCaptured). + if (isa(V)) + return true; + + return false; +} /// isObjectSmallerThan - Return true if we can prove that the object specified /// by V is smaller than Size. @@ -94,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size, } else if (const CallInst* CI = extractMallocCall(V)) { if (!isArrayMalloc(V, &TD)) // The size is the argument to the malloc call. - if (const ConstantInt* C = dyn_cast(CI->getOperand(1))) + if (const ConstantInt* C = dyn_cast(CI->getArgOperand(0))) return (C->getZExtValue() < Size); return false; } else if (const Argument *A = dyn_cast(V)) { @@ -177,9 +191,29 @@ static RegisterAnalysisGroup V(U); ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } //===----------------------------------------------------------------------===// -// BasicAA Pass +// BasicAliasAnalysis Pass //===----------------------------------------------------------------------===// +#ifndef NDEBUG +static const Function *getParent(const Value *V) { + if (const Instruction *inst = dyn_cast(V)) + return inst->getParent()->getParent(); + + if (const Argument *arg = dyn_cast(V)) + return arg->getParent(); + + return NULL; +} + +static bool notDifferentParent(const Value *O1, const Value *O2) { + + const Function *F1 = getParent(O1); + const Function *F2 = getParent(O2); + + return !F1 || !F2 || F1 == F2; +} +#endif + namespace { /// BasicAliasAnalysis - This is the default alias analysis implementation. /// Because it doesn't chain to a previous alias analysis (like -no-aa), it @@ -187,11 +221,14 @@ namespace { struct BasicAliasAnalysis : public NoAA { static char ID; // Class identification, replacement for typeinfo BasicAliasAnalysis() : NoAA(&ID) {} + AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size) { - assert(VisitedPHIs.empty() && "VisitedPHIs must be cleared after use!"); + assert(Visited.empty() && "Visited must be cleared after use!"); + assert(notDifferentParent(V1, V2) && + "BasicAliasAnalysis doesn't support interprocedural queries."); AliasResult Alias = aliasCheck(V1, V1Size, V2, V2Size); - VisitedPHIs.clear(); + Visited.clear(); return Alias; } @@ -213,8 +250,8 @@ namespace { } private: - // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. - SmallPtrSet VisitedPHIs; + // Visited - Track instructions visited by a aliasPHI, aliasSelect(), and aliasGEP(). + SmallPtrSet Visited; // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP // instruction against another. @@ -268,6 +305,9 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { /// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { + assert(notDifferentParent(CS.getInstruction(), P) && + "AliasAnalysis query involving multiple functions!"); + const Value *Object = P->getUnderlyingObject(); // If this is a tail call and P points to a stack location, we know that @@ -318,10 +358,10 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::memcpy: case Intrinsic::memmove: { unsigned Len = ~0U; - if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) + if (ConstantInt *LenCI = dyn_cast(II->getArgOperand(2))) Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); - Value *Src = II->getOperand(2); + Value *Dest = II->getArgOperand(0); + Value *Src = II->getArgOperand(1); if (isNoAlias(Dest, Len, P, Size)) { if (isNoAlias(Src, Len, P, Size)) return NoModRef; @@ -332,9 +372,9 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::memset: // Since memset is 'accesses arguments' only, the AliasAnalysis base class // will handle it for the variable length case. - if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) { + if (ConstantInt *LenCI = dyn_cast(II->getArgOperand(2))) { unsigned Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); + Value *Dest = II->getArgOperand(0); if (isNoAlias(Dest, Len, P, Size)) return NoModRef; } @@ -352,7 +392,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::atomic_load_umax: case Intrinsic::atomic_load_umin: if (TD) { - Value *Op1 = II->getOperand(1); + Value *Op1 = II->getArgOperand(0); unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); if (isNoAlias(Op1, Op1Size, P, Size)) return NoModRef; @@ -361,14 +401,14 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: { - unsigned PtrSize = cast(II->getOperand(1))->getZExtValue(); - if (isNoAlias(II->getOperand(2), PtrSize, P, Size)) + unsigned PtrSize = cast(II->getArgOperand(0))->getZExtValue(); + if (isNoAlias(II->getArgOperand(1), PtrSize, P, Size)) return NoModRef; break; } case Intrinsic::invariant_end: { - unsigned PtrSize = cast(II->getOperand(2))->getZExtValue(); - if (isNoAlias(II->getOperand(3), PtrSize, P, Size)) + unsigned PtrSize = cast(II->getArgOperand(1))->getZExtValue(); + if (isNoAlias(II->getArgOperand(2), PtrSize, P, Size)) return NoModRef; break; } @@ -440,6 +480,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, const Value *V2, unsigned V2Size, const Value *UnderlyingV1, const Value *UnderlyingV2) { + // If this GEP has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(GEP1)) + return MayAlias; + int64_t GEP1BaseOffset; SmallVector, 4> GEP1VariableIndices; @@ -550,6 +597,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, AliasAnalysis::AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, const Value *V2, unsigned V2Size) { + // If this select has been visited before, we're on a use-def cycle. + // Such cycles are only valid when PHI nodes are involved or in unreachable + // code. The visitPHI function catches cycles containing PHIs, but there + // could still be a cycle without PHIs in unreachable code. + if (!Visited.insert(SI)) + return MayAlias; + // If the values are Selects with the same condition, we can do a more precise // check: just check for aliases between the values on corresponding arms. if (const SelectInst *SI2 = dyn_cast(V2)) @@ -570,11 +624,17 @@ BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, // If both arms of the Select node NoAlias or MustAlias V2, then returns // NoAlias / MustAlias. Otherwise, returns MayAlias. AliasResult Alias = - aliasCheck(SI->getTrueValue(), SISize, V2, V2Size); + aliasCheck(V2, V2Size, SI->getTrueValue(), SISize); if (Alias == MayAlias) return MayAlias; + + // If V2 is visited, the recursive case will have been caught in the + // above aliasCheck call, so these subsequent calls to aliasCheck + // don't need to assume that V2 is being visited recursively. + Visited.erase(V2); + AliasResult ThisAlias = - aliasCheck(SI->getFalseValue(), SISize, V2, V2Size); + aliasCheck(V2, V2Size, SI->getFalseValue(), SISize); if (ThisAlias != Alias) return MayAlias; return Alias; @@ -586,7 +646,7 @@ AliasAnalysis::AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, const Value *V2, unsigned V2Size) { // The PHI node has already been visited, avoid recursion any further. - if (!VisitedPHIs.insert(PN)) + if (!Visited.insert(PN)) return MayAlias; // If the values are PHIs in the same block, we can do a more precise @@ -636,10 +696,10 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, unsigned PNSize, for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) { Value *V = V1Srcs[i]; - // If V2 is a PHI, the recursive case will have been caught in the + // If V2 is visited, the recursive case will have been caught in the // above aliasCheck call, so these subsequent calls to aliasCheck // don't need to assume that V2 is being visited recursively. - VisitedPHIs.erase(V2); + Visited.erase(V2); AliasResult ThisAlias = aliasCheck(V2, V2Size, V, PNSize); if (ThisAlias != Alias || ThisAlias == MayAlias) @@ -693,17 +753,32 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, (isa(O2) && isIdentifiedObject(O1) && !isa(O1))) return NoAlias; - // Arguments can't alias with local allocations or noalias calls. - if ((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || - (isa(O2) && (isa(O1) || isNoAliasCall(O1)))) + // Arguments can't alias with local allocations or noalias calls + // in the same function. + if (((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || + (isa(O2) && (isa(O1) || isNoAliasCall(O1))))) return NoAlias; // Most objects can't alias null. - if ((isa(V2) && isKnownNonNull(O1)) || - (isa(V1) && isKnownNonNull(O2))) + if ((isa(O2) && isKnownNonNull(O1)) || + (isa(O1) && isKnownNonNull(O2))) return NoAlias; - } + // If one pointer is the result of a call/invoke or load and the other is a + // non-escaping local object within the same function, then we know the + // object couldn't escape to a point where the call could return it. + // + // Note that if the pointers are in different functions, there are a + // variety of complications. A call with a nocapture argument may still + // temporary store the nocapture argument's value in a temporary memory + // location if that memory location doesn't escape. Or it may pass a + // nocapture value to other functions as long as they don't capture it. + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + return NoAlias; + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + return NoAlias; + } + // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. if (TD) @@ -711,22 +786,6 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; - // If one pointer is the result of a call/invoke or load and the other is a - // non-escaping local object, then we know the object couldn't escape to a - // point where the call could return it. The load case works because - // isNonEscapingLocalObject considers all stores to be escapes (it - // passes true for the StoreCaptures argument to PointerMayBeCaptured). - if (O1 != O2) { - if ((isa(O1) || isa(O1) || isa(O1) || - isa(O1)) && - isNonEscapingLocalObject(O2)) - return NoAlias; - if ((isa(O2) || isa(O2) || isa(O2) || - isa(O2)) && - isNonEscapingLocalObject(O1)) - return NoAlias; - } - // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the // GEP can't simplify, we don't even look at the PHI cases. if (!isa(V1) && isa(V2)) { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 5a37ce0..d9b670d 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -23,6 +23,7 @@ add_llvm_library(LLVMAnalysis LibCallSemantics.cpp Lint.cpp LiveValues.cpp + Loads.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp LoopPass.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 37cda02..13d8f4d 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -208,7 +208,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, i != e; ++i, ++GTI) { ConstantInt *CI = dyn_cast(*i); if (!CI) return false; // Index isn't a simple constant? - if (CI->getZExtValue() == 0) continue; // Not adding anything. + if (CI->isZero()) continue; // Not adding anything. if (const StructType *ST = dyn_cast(*GTI)) { // N = N + Offset @@ -436,8 +436,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, unsigned StrLen = Str.length(); const Type *Ty = cast(CE->getType())->getElementType(); unsigned NumBits = Ty->getPrimitiveSizeInBits(); - // Replace LI with immediate integer store. - if ((NumBits >> 3) == StrLen + 1) { + // Replace load with immediate integer if the result is an integer or fp + // value. + if ((NumBits >> 3) == StrLen + 1 && (NumBits & 7) == 0 && + (isa(Ty) || Ty->isFloatingPointTy())) { APInt StrVal(NumBits, 0); APInt SingleChar(NumBits, 0); if (TD->isLittleEndian()) { @@ -454,7 +456,11 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, SingleChar = 0; StrVal = (StrVal << 8) | SingleChar; } - return ConstantInt::get(CE->getContext(), StrVal); + + Constant *Res = ConstantInt::get(CE->getContext(), StrVal); + if (Ty->isFloatingPointTy()) + Res = ConstantExpr::getBitCast(Res, Ty); + return Res; } } @@ -772,9 +778,9 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, case Instruction::ICmp: case Instruction::FCmp: assert(0 && "Invalid for compares"); case Instruction::Call: - if (Function *F = dyn_cast(Ops[0])) + if (Function *F = dyn_cast(Ops[CallInst::ArgOffset ? 0:NumOps-1])) if (canConstantFoldCallTo(F)) - return ConstantFoldCall(F, Ops+1, NumOps-1); + return ConstantFoldCall(F, Ops+CallInst::ArgOffset, NumOps-1); return 0; case Instruction::PtrToInt: // If the input is a inttoptr, eliminate the pair. This requires knowing diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index a7b6d2b..c8d0d22 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -73,6 +73,15 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { return 0; } +Function *DIDescriptor::getFunctionField(unsigned Elt) const { + if (DbgNode == 0) + return 0; + + if (Elt < DbgNode->getNumOperands()) + return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return 0; +} + unsigned DIVariable::getNumAddrElements() const { return DbgNode->getNumOperands()-6; } @@ -397,6 +406,8 @@ bool DIVariable::isInlinedFnArgument(const Function *CurFn) { /// information for the function F. bool DISubprogram::describes(const Function *F) { assert(F && "Invalid function"); + if (F == getFunction()) + return true; StringRef Name = getLinkageName(); if (Name.empty()) Name = getName(); @@ -938,7 +949,8 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, unsigned VK, unsigned VIndex, DIType ContainingType, bool isArtificial, - bool isOptimized) { + bool isOptimized, + Function *Fn) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_subprogram), @@ -956,9 +968,15 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), ContainingType, ConstantInt::get(Type::getInt1Ty(VMContext), isArtificial), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized) + ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), + Fn }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); + MDNode *Node = MDNode::get(VMContext, &Elts[0], 17); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); } /// CreateSubprogramDefinition - Create new subprogram descriptor for the @@ -984,9 +1002,15 @@ DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) DeclNode->getOperand(12), // VIndex DeclNode->getOperand(13), // Containting Type DeclNode->getOperand(14), // isArtificial - DeclNode->getOperand(15) // isOptimized + DeclNode->getOperand(15), // isOptimized + SPDeclaration.getFunction() }; - return DISubprogram(MDNode::get(VMContext, &Elts[0], 16)); + MDNode *Node =MDNode::get(VMContext, &Elts[0], 16); + + // Create a named metadata so that we do not lose this mdnode. + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(Node); + return DISubprogram(Node); } /// CreateGlobalVariable - Create a new descriptor for the specified global. @@ -1042,8 +1066,18 @@ DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a // named mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.lv"); - NMD->addOperand(Node); + DISubprogram Fn(getDISubprogram(Context)); + StringRef FName = "fn"; + if (Fn.getFunction()) + FName = Fn.getFunction()->getName(); + char One = '\1'; + if (FName.startswith(StringRef(&One, 1))) + FName = FName.substr(1); + NamedMDNode *FnLocals = M.getNamedMetadata(Twine("llvm.dbg.lv.", FName)); + if (!FnLocals) + FnLocals = NamedMDNode::Create(VMContext, Twine("llvm.dbg.lv.", FName), + NULL, 0, &M); + FnLocals->addOperand(Node); } return DIVariable(Node); } @@ -1110,18 +1144,6 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } -/// CreateLocation - Creates a debug info location. -DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, MDNode *OrigLoc) { - Value *Elts[] = { - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S, - OrigLoc - }; - return DILocation(MDNode::get(VMContext, &Elts[0], 4)); -} - //===----------------------------------------------------------------------===// // DIFactory: Routines for inserting code into a function //===----------------------------------------------------------------------===// @@ -1218,17 +1240,19 @@ void DebugInfoFinder::processModule(Module &M) { processLocation(DILocation(IA)); } - NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIGlobalVariable DIG(cast(NMD->getOperand(i))); - if (addGlobalVariable(DIG)) { - addCompileUnit(DIG.getCompileUnit()); - processType(DIG.getType()); + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIGlobalVariable DIG(cast(NMD->getOperand(i))); + if (addGlobalVariable(DIG)) { + addCompileUnit(DIG.getCompileUnit()); + processType(DIG.getType()); + } } } + + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + processSubprogram(DISubprogram(NMD->getOperand(i))); } /// processLocation - Process DILocation. diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index a1676e5..d95c376 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -43,10 +43,10 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { if (isSimple()) return DOTGraphTraits - ::getSimpleNodeLabel(BB, BB->getParent()); + ::getSimpleNodeLabel(BB, BB->getParent()); else return DOTGraphTraits - ::getCompleteNodeLabel(BB, BB->getParent()); + ::getCompleteNodeLabel(BB, BB->getParent()); } }; diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 2bde56d7..65c7c6e 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -126,13 +126,15 @@ private: } // Loop over all of the users of the function, looking for non-call uses. - for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I) - if ((!isa(I) && !isa(I)) - || !CallSite(cast(I)).isCallee(I)) { + for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ + User *U = *I; + if ((!isa(U) && !isa(U)) + || !CallSite(cast(U)).isCallee(I)) { // Not a call, or being used as a parameter rather than as the callee. ExternalCallingNode->addCalledFunction(CallSite(), Node); break; } + } // If this function is not defined in this translation unit, it could call // anything. diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index b14afa3..f13deea 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -233,33 +233,34 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, GlobalValue *OkayStoreDest) { if (!V->getType()->isPointerTy()) return true; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) - if (LoadInst *LI = dyn_cast(*UI)) { + for (Value::use_iterator UI = V->use_begin(), E=V->use_end(); UI != E; ++UI) { + User *U = *UI; + if (LoadInst *LI = dyn_cast(U)) { Readers.push_back(LI->getParent()->getParent()); - } else if (StoreInst *SI = dyn_cast(*UI)) { + } else if (StoreInst *SI = dyn_cast(U)) { if (V == SI->getOperand(1)) { Writers.push_back(SI->getParent()->getParent()); } else if (SI->getOperand(1) != OkayStoreDest) { return true; // Storing the pointer } - } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + } else if (GetElementPtrInst *GEP = dyn_cast(U)) { if (AnalyzeUsesOfPointer(GEP, Readers, Writers)) return true; - } else if (BitCastInst *BCI = dyn_cast(*UI)) { + } else if (BitCastInst *BCI = dyn_cast(U)) { if (AnalyzeUsesOfPointer(BCI, Readers, Writers, OkayStoreDest)) return true; - } else if (isFreeCall(*UI)) { - Writers.push_back(cast(*UI)->getParent()->getParent()); - } else if (CallInst *CI = dyn_cast(*UI)) { + } else if (isFreeCall(U)) { + Writers.push_back(cast(U)->getParent()->getParent()); + } else if (CallInst *CI = dyn_cast(U)) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i) - if (CI->getOperand(i) == V) return true; - } else if (InvokeInst *II = dyn_cast(*UI)) { + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + if (CI->getArgOperand(i) == V) return true; + } else if (InvokeInst *II = dyn_cast(U)) { // Make sure that this is just the function being called, not that it is // passing into the function. - for (unsigned i = 0, e = II->getNumOperands() - 3; i != e; ++i) - if (II->getOperand(i) == V) return true; - } else if (ConstantExpr *CE = dyn_cast(*UI)) { + for (unsigned i = 0, e = II->getNumArgOperands(); i != e; ++i) + if (II->getArgOperand(i) == V) return true; + } else if (ConstantExpr *CE = dyn_cast(U)) { if (CE->getOpcode() == Instruction::GetElementPtr || CE->getOpcode() == Instruction::BitCast) { if (AnalyzeUsesOfPointer(CE, Readers, Writers)) @@ -267,12 +268,14 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, } else { return true; } - } else if (ICmpInst *ICI = dyn_cast(*UI)) { + } else if (ICmpInst *ICI = dyn_cast(U)) { if (!isa(ICI->getOperand(1))) return true; // Allow comparison against null. } else { return true; } + } + return false; } @@ -291,7 +294,8 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { // Walk the user list of the global. If we find anything other than a direct // load or store, bail out. for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ - if (LoadInst *LI = dyn_cast(*I)) { + User *U = *I; + if (LoadInst *LI = dyn_cast(U)) { // The pointer loaded from the global can only be used in simple ways: // we allow addressing of it and loading storing to it. We do *not* allow // storing the loaded pointer somewhere else or passing to a function. @@ -299,7 +303,7 @@ bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) return false; // Loaded pointer escapes. // TODO: Could try some IP mod/ref of the loaded pointer. - } else if (StoreInst *SI = dyn_cast(*I)) { + } else if (StoreInst *SI = dyn_cast(U)) { // Storing the global itself. if (SI->getOperand(0) == GV) return false; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 98dbb69..b1df517 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -162,14 +162,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { if (Function *F = CS.getCalledFunction()) { if (F->isDeclaration() && (F->getName() == "setjmp" || F->getName() == "_setjmp")) - NeverInline = true; + callsSetJmp = true; // If this call is to function itself, then the function is recursive. // Inlining it into other functions is a bad idea, because this is // basically just a form of loop peeling, and our metrics aren't useful // for that case. if (F == BB->getParent()) - NeverInline = true; + isRecursive = true; } if (!isa(II) && !callIsSmall(CS.getCalledFunction())) { @@ -220,7 +220,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) { // jump would jump from the inlined copy of the function into the original // function which is extremely undefined behavior. if (isa(BB->getTerminator())) - NeverInline = true; + containsIndirectBr = true; // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; @@ -247,7 +247,7 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { // Don't bother calculating argument weights if we are never going to inline // the function anyway. - if (Metrics.NeverInline) + if (NeverInline()) return; // Check out all of the arguments to the function, figuring out how much @@ -258,6 +258,14 @@ void InlineCostAnalyzer::FunctionInfo::analyzeFunction(Function *F) { CountCodeReductionForAlloca(I))); } +/// NeverInline - returns true if the function should never be inlined into +/// any caller +bool InlineCostAnalyzer::FunctionInfo::NeverInline() +{ + return (Metrics.callsSetJmp || Metrics.isRecursive || + Metrics.containsIndirectBr); + +} // getInlineCost - The heuristic used to determine if we should inline the // function call or not. // @@ -315,7 +323,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, CalleeFI->analyzeFunction(Callee); // If we should never inline this, return a huge cost. - if (CalleeFI->Metrics.NeverInline) + if (CalleeFI->NeverInline()) return InlineCost::getNever(); // FIXME: It would be nice to kill off CalleeFI->NeverInline. Then we @@ -443,10 +451,15 @@ InlineCostAnalyzer::growCachedCostInfo(Function *Caller, Function *Callee) { } // Since CalleeMetrics were already calculated, we know that the CallerMetrics - // reference isn't invalidated: both were in the DenseMap. - CallerMetrics.NeverInline |= CalleeMetrics.NeverInline; + // reference isn't invalidated: both were in the DenseMap. CallerMetrics.usesDynamicAlloca |= CalleeMetrics.usesDynamicAlloca; + // FIXME: If any of these three are true for the callee, the callee was + // not inlined into the caller, so I think they're redundant here. + CallerMetrics.callsSetJmp |= CalleeMetrics.callsSetJmp; + CallerMetrics.isRecursive |= CalleeMetrics.isRecursive; + CallerMetrics.containsIndirectBr |= CalleeMetrics.containsIndirectBr; + CallerMetrics.NumInsts += CalleeMetrics.NumInsts; CallerMetrics.NumBlocks += CalleeMetrics.NumBlocks; CallerMetrics.NumCalls += CalleeMetrics.NumCalls; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index a031cbc..9f1b30d 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -19,7 +19,8 @@ // // Another limitation is that it assumes all code will be executed. A store // through a null pointer in a basic block which is never reached is harmless, -// but this pass will warn about it anyway. +// but this pass will warn about it anyway. This is the main reason why most +// of these checks live here instead of in the Verifier pass. // // Optimization passes may make conditions that this pass checks for more or // less obvious. If an optimization pass appears to be introducing a warning, @@ -35,7 +36,11 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/Target/TargetData.h" @@ -64,7 +69,8 @@ namespace { void visitFunction(Function &F); void visitCallSite(CallSite CS); - void visitMemoryReference(Instruction &I, Value *Ptr, unsigned Align, + void visitMemoryReference(Instruction &I, Value *Ptr, + unsigned Size, unsigned Align, const Type *Ty, unsigned Flags); void visitCallInst(CallInst &I); @@ -88,9 +94,14 @@ namespace { void visitInsertElementInst(InsertElementInst &I); void visitUnreachableInst(UnreachableInst &I); + Value *findValue(Value *V, bool OffsetOk) const; + Value *findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet &Visited) const; + public: Module *Mod; AliasAnalysis *AA; + DominatorTree *DT; TargetData *TD; std::string Messages; @@ -104,6 +115,7 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); AU.addRequired(); + AU.addRequired(); } virtual void print(raw_ostream &O, const Module *M) const {} @@ -176,6 +188,7 @@ X("lint", "Statically lint-checks LLVM IR", false, true); bool Lint::runOnFunction(Function &F) { Mod = F.getParent(); AA = &getAnalysis(); + DT = &getAnalysis(); TD = getAnalysisIfAvailable(); visit(F); dbgs() << MessagesStr.str(); @@ -188,15 +201,17 @@ void Lint::visitFunction(Function &F) { // fairly common mistake to neglect to name a function. Assert1(F.hasName() || F.hasLocalLinkage(), "Unusual: Unnamed function with non-local linkage", &F); + + // TODO: Check for irreducible control flow. } void Lint::visitCallSite(CallSite CS) { Instruction &I = *CS.getInstruction(); Value *Callee = CS.getCalledValue(); - visitMemoryReference(I, Callee, 0, 0, MemRef::Callee); + visitMemoryReference(I, Callee, ~0u, 0, 0, MemRef::Callee); - if (Function *F = dyn_cast(Callee->stripPointerCasts())) { + if (Function *F = dyn_cast(findValue(Callee, /*OffsetOk=*/false))) { Assert1(CS.getCallingConv() == F->getCallingConv(), "Undefined behavior: Caller and callee calling convention differ", &I); @@ -209,23 +224,53 @@ void Lint::visitCallSite(CallSite CS) { FT->getNumParams() == NumActualArgs, "Undefined behavior: Call argument count mismatches callee " "argument count", &I); - - // TODO: Check argument types (in case the callee was casted) - - // TODO: Check ABI-significant attributes. - // TODO: Check noalias attribute. - - // TODO: Check sret attribute. + Assert1(FT->getReturnType() == I.getType(), + "Undefined behavior: Call return type mismatches " + "callee return type", &I); + + // Check argument types (in case the callee was casted) and attributes. + // TODO: Verify that caller and callee attributes are compatible. + Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end(); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); + for (; AI != AE; ++AI) { + Value *Actual = *AI; + if (PI != PE) { + Argument *Formal = PI++; + Assert1(Formal->getType() == Actual->getType(), + "Undefined behavior: Call argument type mismatches " + "callee parameter type", &I); + + // Check that noalias arguments don't alias other arguments. The + // AliasAnalysis API isn't expressive enough for what we really want + // to do. Known partial overlap is not distinguished from the case + // where nothing is known. + if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) + for (CallSite::arg_iterator BI = CS.arg_begin(); BI != AE; ++BI) { + Assert1(AI == BI || + AA->alias(*AI, ~0u, *BI, ~0u) != AliasAnalysis::MustAlias, + "Unusual: noalias argument aliases another argument", &I); + } + + // Check that an sret argument points to valid memory. + if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) { + const Type *Ty = + cast(Formal->getType())->getElementType(); + visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), + TD ? TD->getABITypeAlignment(Ty) : 0, + Ty, MemRef::Read | MemRef::Write); + } + } + } } if (CS.isCall() && cast(CS.getInstruction())->isTailCall()) for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { - Value *Obj = (*AI)->getUnderlyingObject(); - Assert1(!isa(Obj) && !isa(Obj), + Value *Obj = findValue(*AI, /*OffsetOk=*/true); + Assert1(!isa(Obj), "Undefined behavior: Call with \"tail\" keyword references " - "alloca or va_arg", &I); + "alloca", &I); } @@ -237,9 +282,10 @@ void Lint::visitCallSite(CallSite CS) { case Intrinsic::memcpy: { MemCpyInst *MCI = cast(&I); - visitMemoryReference(I, MCI->getSource(), MCI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MCI->getDest(), ~0u, MCI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MCI->getDest(), MCI->getAlignment(), 0, + visitMemoryReference(I, MCI->getSource(), ~0u, MCI->getAlignment(), 0, MemRef::Read); // Check that the memcpy arguments don't overlap. The AliasAnalysis API @@ -247,7 +293,8 @@ void Lint::visitCallSite(CallSite CS) { // overlap is not distinguished from the case where nothing is known. unsigned Size = 0; if (const ConstantInt *Len = - dyn_cast(MCI->getLength()->stripPointerCasts())) + dyn_cast(findValue(MCI->getLength(), + /*OffsetOk=*/false))) if (Len->getValue().isIntN(32)) Size = Len->getValue().getZExtValue(); Assert1(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) != @@ -257,15 +304,17 @@ void Lint::visitCallSite(CallSite CS) { } case Intrinsic::memmove: { MemMoveInst *MMI = cast(&I); - visitMemoryReference(I, MMI->getSource(), MMI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MMI->getDest(), ~0u, MMI->getAlignment(), 0, MemRef::Write); - visitMemoryReference(I, MMI->getDest(), MMI->getAlignment(), 0, + visitMemoryReference(I, MMI->getSource(), ~0u, MMI->getAlignment(), 0, MemRef::Read); break; } case Intrinsic::memset: { MemSetInst *MSI = cast(&I); - visitMemoryReference(I, MSI->getDest(), MSI->getAlignment(), 0, + // TODO: If the size is known, use it. + visitMemoryReference(I, MSI->getDest(), ~0u, MSI->getAlignment(), 0, MemRef::Write); break; } @@ -275,15 +324,15 @@ void Lint::visitCallSite(CallSite CS) { "Undefined behavior: va_start called in a non-varargs function", &I); - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; case Intrinsic::vacopy: - visitMemoryReference(I, CS.getArgument(0), 0, 0, MemRef::Write); - visitMemoryReference(I, CS.getArgument(1), 0, 0, MemRef::Read); + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Write); + visitMemoryReference(I, CS.getArgument(1), ~0u, 0, 0, MemRef::Read); break; case Intrinsic::vaend: - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; @@ -291,7 +340,7 @@ void Lint::visitCallSite(CallSite CS) { // Stackrestore doesn't read or write memory, but it sets the // stack pointer, which the compiler may read from or write to // at any time, so check it for both readability and writeability. - visitMemoryReference(I, CS.getArgument(0), 0, 0, + visitMemoryReference(I, CS.getArgument(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); break; } @@ -310,17 +359,35 @@ void Lint::visitReturnInst(ReturnInst &I) { Assert1(!F->doesNotReturn(), "Unusual: Return statement in function with noreturn attribute", &I); + + if (Value *V = I.getReturnValue()) { + Value *Obj = findValue(V, /*OffsetOk=*/true); + Assert1(!isa(Obj), + "Unusual: Returning alloca value", &I); + } } -// TODO: Add a length argument and check that the reference is in bounds +// TODO: Check that the reference is in bounds. +// TODO: Check readnone/readonly function attributes. void Lint::visitMemoryReference(Instruction &I, - Value *Ptr, unsigned Align, const Type *Ty, - unsigned Flags) { - Value *UnderlyingObject = Ptr->getUnderlyingObject(); + Value *Ptr, unsigned Size, unsigned Align, + const Type *Ty, unsigned Flags) { + // If no memory is being referenced, it doesn't matter if the pointer + // is valid. + if (Size == 0) + return; + + Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true); Assert1(!isa(UnderlyingObject), "Undefined behavior: Null pointer dereference", &I); Assert1(!isa(UnderlyingObject), "Undefined behavior: Undef pointer dereference", &I); + Assert1(!isa(UnderlyingObject) || + !cast(UnderlyingObject)->isAllOnesValue(), + "Unusual: All-ones pointer dereference", &I); + Assert1(!isa(UnderlyingObject) || + !cast(UnderlyingObject)->isOne(), + "Unusual: Address one pointer dereference", &I); if (Flags & MemRef::Write) { if (const GlobalVariable *GV = dyn_cast(UnderlyingObject)) @@ -361,13 +428,16 @@ void Lint::visitMemoryReference(Instruction &I, } void Lint::visitLoadInst(LoadInst &I) { - visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), I.getType(), - MemRef::Read); + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getType()), I.getAlignment(), + I.getType(), MemRef::Read); } void Lint::visitStoreInst(StoreInst &I) { - visitMemoryReference(I, I.getPointerOperand(), I.getAlignment(), - I.getOperand(0)->getType(), MemRef::Write); + visitMemoryReference(I, I.getPointerOperand(), + AA->getTypeStoreSize(I.getOperand(0)->getType()), + I.getAlignment(), + I.getOperand(0)->getType(), MemRef::Write); } void Lint::visitXor(BinaryOperator &I) { @@ -384,21 +454,21 @@ void Lint::visitSub(BinaryOperator &I) { void Lint::visitLShr(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast(I.getOperand(1)->stripPointerCasts())) + dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitAShr(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast(I.getOperand(1)->stripPointerCasts())) + dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } void Lint::visitShl(BinaryOperator &I) { if (ConstantInt *CI = - dyn_cast(I.getOperand(1)->stripPointerCasts())) + dyn_cast(findValue(I.getOperand(1), /*OffsetOk=*/false))) Assert1(CI->getValue().ult(cast(I.getType())->getBitWidth()), "Undefined result: Shift count out of range", &I); } @@ -439,27 +509,31 @@ void Lint::visitAllocaInst(AllocaInst &I) { // This isn't undefined behavior, it's just an obvious pessimization. Assert1(&I.getParent()->getParent()->getEntryBlock() == I.getParent(), "Pessimization: Static alloca outside of entry block", &I); + + // TODO: Check for an unusual size (MSB set?) } void Lint::visitVAArgInst(VAArgInst &I) { - visitMemoryReference(I, I.getOperand(0), 0, 0, + visitMemoryReference(I, I.getOperand(0), ~0u, 0, 0, MemRef::Read | MemRef::Write); } void Lint::visitIndirectBrInst(IndirectBrInst &I) { - visitMemoryReference(I, I.getAddress(), 0, 0, MemRef::Branchee); + visitMemoryReference(I, I.getAddress(), ~0u, 0, 0, MemRef::Branchee); } void Lint::visitExtractElementInst(ExtractElementInst &I) { if (ConstantInt *CI = - dyn_cast(I.getIndexOperand()->stripPointerCasts())) + dyn_cast(findValue(I.getIndexOperand(), + /*OffsetOk=*/false))) Assert1(CI->getValue().ult(I.getVectorOperandType()->getNumElements()), "Undefined result: extractelement index out of range", &I); } void Lint::visitInsertElementInst(InsertElementInst &I) { if (ConstantInt *CI = - dyn_cast(I.getOperand(2)->stripPointerCasts())) + dyn_cast(findValue(I.getOperand(2), + /*OffsetOk=*/false))) Assert1(CI->getValue().ult(I.getType()->getNumElements()), "Undefined result: insertelement index out of range", &I); } @@ -472,6 +546,91 @@ void Lint::visitUnreachableInst(UnreachableInst &I) { "side effects", &I); } +/// findValue - Look through bitcasts and simple memory reference patterns +/// to identify an equivalent, but more informative, value. If OffsetOk +/// is true, look through getelementptrs with non-zero offsets too. +/// +/// Most analysis passes don't require this logic, because instcombine +/// will simplify most of these kinds of things away. But it's a goal of +/// this Lint pass to be useful even on non-optimized IR. +Value *Lint::findValue(Value *V, bool OffsetOk) const { + SmallPtrSet Visited; + return findValueImpl(V, OffsetOk, Visited); +} + +/// findValueImpl - Implementation helper for findValue. +Value *Lint::findValueImpl(Value *V, bool OffsetOk, + SmallPtrSet &Visited) const { + // Detect self-referential values. + if (!Visited.insert(V)) + return UndefValue::get(V->getType()); + + // TODO: Look through sext or zext cast, when the result is known to + // be interpreted as signed or unsigned, respectively. + // TODO: Look through eliminable cast pairs. + // TODO: Look through calls with unique return values. + // TODO: Look through vector insert/extract/shuffle. + V = OffsetOk ? V->getUnderlyingObject() : V->stripPointerCasts(); + if (LoadInst *L = dyn_cast(V)) { + BasicBlock::iterator BBI = L; + BasicBlock *BB = L->getParent(); + SmallPtrSet VisitedBlocks; + for (;;) { + if (!VisitedBlocks.insert(BB)) break; + if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), + BB, BBI, 6, AA)) + return findValueImpl(U, OffsetOk, Visited); + if (BBI != BB->begin()) break; + BB = BB->getUniquePredecessor(); + if (!BB) break; + BBI = BB->end(); + } + } else if (PHINode *PN = dyn_cast(V)) { + if (Value *W = PN->hasConstantValue(DT)) + return findValueImpl(W, OffsetOk, Visited); + } else if (CastInst *CI = dyn_cast(V)) { + if (CI->isNoopCast(TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CI->getOperand(0), OffsetOk, Visited); + } else if (ExtractValueInst *Ex = dyn_cast(V)) { + if (Value *W = FindInsertedValue(Ex->getAggregateOperand(), + Ex->idx_begin(), + Ex->idx_end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast(V)) { + // Same as above, but for ConstantExpr instead of Instruction. + if (Instruction::isCast(CE->getOpcode())) { + if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()), + CE->getOperand(0)->getType(), + CE->getType(), + TD ? TD->getIntPtrType(V->getContext()) : + Type::getInt64Ty(V->getContext()))) + return findValueImpl(CE->getOperand(0), OffsetOk, Visited); + } else if (CE->getOpcode() == Instruction::ExtractValue) { + const SmallVector &Indices = CE->getIndices(); + if (Value *W = FindInsertedValue(CE->getOperand(0), + Indices.begin(), + Indices.end())) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + } + + // As a last resort, try SimplifyInstruction or constant folding. + if (Instruction *Inst = dyn_cast(V)) { + if (Value *W = SimplifyInstruction(Inst, TD)) + if (W != Inst) + return findValueImpl(W, OffsetOk, Visited); + } else if (ConstantExpr *CE = dyn_cast(V)) { + if (Value *W = ConstantFoldConstantExpression(CE, TD)) + if (W != V) + return findValueImpl(W, OffsetOk, Visited); + } + + return V; +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp new file mode 100644 index 0000000..2ba1d86 --- /dev/null +++ b/lib/Analysis/Loads.cpp @@ -0,0 +1,235 @@ +//===- Loads.cpp - Local load analysis ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines simple local analyses for load instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Target/TargetData.h" +#include "llvm/GlobalAlias.h" +#include "llvm/GlobalVariable.h" +#include "llvm/IntrinsicInst.h" +using namespace llvm; + +/// AreEquivalentAddressValues - Test if A and B will obviously have the same +/// value. This includes recognizing that %t0 and %t1 will have the same +/// value in code like this: +/// %t0 = getelementptr \@a, 0, 3 +/// store i32 0, i32* %t0 +/// %t1 = getelementptr \@a, 0, 3 +/// %t2 = load i32* %t1 +/// +static bool AreEquivalentAddressValues(const Value *A, const Value *B) { + // Test if the values are trivially equivalent. + if (A == B) return true; + + // Test if the values come from identical arithmetic instructions. + // Use isIdenticalToWhenDefined instead of isIdenticalTo because + // this function is only used when one address use dominates the + // other, which means that they'll always either have the same + // value or one of them will have an undefined value. + if (isa(A) || isa(A) || + isa(A) || isa(A)) + if (const Instruction *BI = dyn_cast(B)) + if (cast(A)->isIdenticalToWhenDefined(BI)) + return true; + + // Otherwise they may not be equivalent. + return false; +} + +/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and +/// bitcasts to get back to the underlying object being addressed, keeping +/// track of the offset in bytes from the GEPs relative to the result. +/// This is closely related to Value::getUnderlyingObject but is located +/// here to avoid making VMCore depend on TargetData. +static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, + uint64_t &ByteOffset, + unsigned MaxLookup = 6) { + if (!V->getType()->isPointerTy()) + return V; + for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->hasAllConstantIndices()) + return V; + SmallVector Indices(GEP->op_begin() + 1, GEP->op_end()); + ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), + &Indices[0], Indices.size()); + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + return V; + V = GA->getAliasee(); + } else { + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } + return V; +} + +/// isSafeToLoadUnconditionally - Return true if we know that executing a load +/// from this value cannot trap. If it is not obviously safe to load from the +/// specified pointer, we do a quick local scan of the basic block containing +/// ScanFrom, to determine if the address is already accessed. +bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, + unsigned Align, const TargetData *TD) { + uint64_t ByteOffset = 0; + Value *Base = V; + if (TD) + Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + + const Type *BaseType = 0; + unsigned BaseAlign = 0; + if (const AllocaInst *AI = dyn_cast(Base)) { + // An alloca is safe to load from as load as it is suitably aligned. + BaseType = AI->getAllocatedType(); + BaseAlign = AI->getAlignment(); + } else if (const GlobalValue *GV = dyn_cast(Base)) { + // Global variables are safe to load from but their size cannot be + // guaranteed if they are overridden. + if (!isa(GV) && !GV->mayBeOverridden()) { + BaseType = GV->getType()->getElementType(); + BaseAlign = GV->getAlignment(); + } + } + + if (BaseType && BaseType->isSized()) { + if (TD && BaseAlign == 0) + BaseAlign = TD->getPrefTypeAlignment(BaseType); + + if (Align <= BaseAlign) { + if (!TD) + return true; // Loading directly from an alloca or global is OK. + + // Check if the load is within the bounds of the underlying object. + const PointerType *AddrTy = cast(V->getType()); + uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); + if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && + (Align == 0 || (ByteOffset % Align) == 0)) + return true; + } + } + + // Otherwise, be a little bit aggressive by scanning the local block where we + // want to check to see if the pointer is already being loaded or stored + // from/to. If so, the previous load or store would have already trapped, + // so there is no harm doing an extra load (also, CSE will later eliminate + // the load entirely). + BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); + + while (BBI != E) { + --BBI; + + // If we see a free or a call which may write to memory (i.e. which might do + // a free) the pointer could be marked invalid. + if (isa(BBI) && BBI->mayWriteToMemory() && + !isa(BBI)) + return false; + + if (LoadInst *LI = dyn_cast(BBI)) { + if (AreEquivalentAddressValues(LI->getOperand(0), V)) return true; + } else if (StoreInst *SI = dyn_cast(BBI)) { + if (AreEquivalentAddressValues(SI->getOperand(1), V)) return true; + } + } + return false; +} + +/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the +/// instruction before ScanFrom) checking to see if we have the value at the +/// memory address *Ptr locally available within a small number of instructions. +/// If the value is available, return it. +/// +/// If not, return the iterator for the last validated instruction that the +/// value would be live through. If we scanned the entire block and didn't find +/// something that invalidates *Ptr or provides it, ScanFrom would be left at +/// begin() and this returns null. ScanFrom could also be left +/// +/// MaxInstsToScan specifies the maximum instructions to scan in the block. If +/// it is set to 0, it will scan the whole block. You can also optionally +/// specify an alias analysis implementation, which makes this more precise. +Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, + BasicBlock::iterator &ScanFrom, + unsigned MaxInstsToScan, + AliasAnalysis *AA) { + if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; + + // If we're using alias analysis to disambiguate get the size of *Ptr. + unsigned AccessSize = 0; + if (AA) { + const Type *AccessTy = cast(Ptr->getType())->getElementType(); + AccessSize = AA->getTypeStoreSize(AccessTy); + } + + while (ScanFrom != ScanBB->begin()) { + // We must ignore debug info directives when counting (otherwise they + // would affect codegen). + Instruction *Inst = --ScanFrom; + if (isa(Inst)) + continue; + + // Restore ScanFrom to expected value in case next test succeeds + ScanFrom++; + + // Don't scan huge blocks. + if (MaxInstsToScan-- == 0) return 0; + + --ScanFrom; + // If this is a load of Ptr, the loaded value is available. + if (LoadInst *LI = dyn_cast(Inst)) + if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) + return LI; + + if (StoreInst *SI = dyn_cast(Inst)) { + // If this is a store through Ptr, the value is available! + if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) + return SI->getOperand(0); + + // If Ptr is an alloca and this is a store to a different alloca, ignore + // the store. This is a trivial form of alias analysis that is important + // for reg2mem'd code. + if ((isa(Ptr) || isa(Ptr)) && + (isa(SI->getOperand(1)) || + isa(SI->getOperand(1)))) + continue; + + // If we have alias analysis and it says the store won't modify the loaded + // value, ignore the store. + if (AA && + (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // Otherwise the store that may or may not alias the pointer, bail out. + ++ScanFrom; + return 0; + } + + // If this is some other instruction that may clobber Ptr, bail out. + if (Inst->mayWriteToMemory()) { + // If alias analysis claims that it really won't modify the load, + // ignore it. + if (AA && + (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) + continue; + + // May modify the pointer, bail out. + ++ScanFrom; + return 0; + } + } + + // Got to the start of the block, we didn't find it, but are done for this + // block. + return 0; +} diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 735e31f..818d0a9 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -266,15 +266,16 @@ unsigned Loop::getSmallConstantTripMultiple() const { bool Loop::isLCSSAForm(DominatorTree &DT) const { // Sort the blocks vector so that we can use binary search to do quick // lookups. - SmallPtrSet LoopBBs(block_begin(), block_end()); + SmallPtrSet LoopBBs(block_begin(), block_end()); for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { - BasicBlock *UserBB = cast(*UI)->getParent(); - if (PHINode *P = dyn_cast(*UI)) + User *U = *UI; + BasicBlock *UserBB = cast(U)->getParent(); + if (PHINode *P = dyn_cast(U)) UserBB = P->getIncomingBlock(UI); // Check the current block, as a fast-path, before checking whether diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 89f9743..1ab18ca 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -101,9 +101,9 @@ static Value *computeArraySize(const CallInst *CI, const TargetData *TD, if (const StructType *ST = dyn_cast(T)) ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); - // If malloc calls' arg can be determined to be a multiple of ElementSize, + // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. - Value *MallocArg = CI->getOperand(1); + Value *MallocArg = CI->getArgOperand(0); Value *Multiple = NULL; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) @@ -120,7 +120,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) { Value *ArraySize = computeArraySize(CI, TD); if (ArraySize && - ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1)) + ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. @@ -183,25 +183,25 @@ Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD, // free Call Utility Functions. // -/// isFreeCall - Returns true if the value is a call to the builtin free() -bool llvm::isFreeCall(const Value *I) { +/// isFreeCall - Returns non-null if the value is a call to the builtin free() +const CallInst *llvm::isFreeCall(const Value *I) { const CallInst *CI = dyn_cast(I); if (!CI) - return false; + return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free") - return false; + return 0; // Check free prototype. // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. const FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) - return false; + return 0; if (FTy->getNumParams() != 1) - return false; + return 0; if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext())) - return false; + return 0; - return true; + return CI; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 2aa2f17..1f54d74 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -116,8 +116,8 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, } else if (VAArgInst *V = dyn_cast(Inst)) { Pointer = V->getOperand(0); PointerSize = AA->getTypeStoreSize(V->getType()); - } else if (isFreeCall(Inst)) { - Pointer = Inst->getOperand(1); + } else if (const CallInst *CI = isFreeCall(Inst)) { + Pointer = CI->getArgOperand(0); // calls to free() erase the entire structure PointerSize = ~0ULL; } else if (isa(Inst) || isa(Inst)) { @@ -197,9 +197,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U); + AA->alias(II->getArgOperand(2), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) { - InvariantTag = II->getOperand(1); + InvariantTag = II->getArgOperand(0); continue; } @@ -210,7 +210,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // pointer, not on query pointers that are indexed off of them. It'd // be nice to handle that at some point. AliasAnalysis::AliasResult R = - AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U); + AA->alias(II->getArgOperand(1), ~0U, MemPtr, ~0U); if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(II); } @@ -365,25 +365,26 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { MemPtr = LI->getPointerOperand(); MemSize = AA->getTypeStoreSize(LI->getType()); } - } else if (isFreeCall(QueryInst)) { - MemPtr = QueryInst->getOperand(1); + } else if (const CallInst *CI = isFreeCall(QueryInst)) { + MemPtr = CI->getArgOperand(0); // calls to free() erase the entire structure, not just a field. MemSize = ~0UL; } else if (isa(QueryInst) || isa(QueryInst)) { int IntrinsicID = 0; // Intrinsic IDs start at 1. - if (IntrinsicInst *II = dyn_cast(QueryInst)) + IntrinsicInst *II = dyn_cast(QueryInst); + if (II) IntrinsicID = II->getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - MemPtr = QueryInst->getOperand(2); - MemSize = cast(QueryInst->getOperand(1))->getZExtValue(); + MemPtr = II->getArgOperand(1); + MemSize = cast(II->getArgOperand(0))->getZExtValue(); break; case Intrinsic::invariant_end: - MemPtr = QueryInst->getOperand(3); - MemSize = cast(QueryInst->getOperand(2))->getZExtValue(); + MemPtr = II->getArgOperand(2); + MemSize = cast(II->getArgOperand(1))->getZExtValue(); break; default: CallSite QueryCS = CallSite::get(QueryInst); @@ -456,7 +457,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. if (!CacheP.second) { - NumCacheNonLocal++; + ++NumCacheNonLocal; return Cache; } @@ -478,7 +479,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { BasicBlock *QueryBB = QueryCS.getInstruction()->getParent(); for (BasicBlock **PI = PredCache->GetPreds(QueryBB); *PI; ++PI) DirtyBlocks.push_back(*PI); - NumUncacheNonLocal++; + ++NumUncacheNonLocal; } // isReadonlyCall - If this is a read-only call, we can be more aggressive. diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index f0f3a05..7354afa 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -67,10 +67,11 @@ PostDominanceFrontier::calculate(const PostDominatorTree &DT, if (BB) for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); SI != SE; ++SI) { + BasicBlock *P = *SI; // Does Node immediately dominate this predecessor? - DomTreeNode *SINode = DT[*SI]; + DomTreeNode *SINode = DT[P]; if (SINode && SINode->getIDom() != Node) - S.insert(*SI); + S.insert(P); } // At this point, S is DFlocal. Now we union in DFup's of our children... diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 662576e..38dcd25 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -577,8 +577,6 @@ static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::s template<> bool ProfileInfoT::EstimateMissingEdges(const BasicBlock *BB) { - bool hasNoSuccessors = false; - double inWeight = 0; std::set inMissing; std::set ProcessedPreds; @@ -596,10 +594,8 @@ bool ProfileInfoT::EstimateMissingEdges(const BasicBlock *B std::set outMissing; std::set ProcessedSuccs; succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi == sbbe) { + if (sbbi == sbbe) readEdge(this,getEdge(BB,0),outWeight,outMissing); - hasNoSuccessors = true; - } for ( ; sbbi != sbbe; ++sbbi ) { if (ProcessedSuccs.insert(*sbbi).second) { readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 6870268..413b3b4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -822,7 +822,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // Fold if the operand is constant. if (const SCEVConstant *SC = dyn_cast(Op)) return getConstant( - cast(ConstantExpr::getTrunc(SC->getValue(), Ty))); + cast(ConstantExpr::getTrunc(SC->getValue(), + getEffectiveSCEVType(Ty)))); // trunc(trunc(x)) --> trunc(x) if (const SCEVTruncateExpr *ST = dyn_cast(Op)) @@ -844,9 +845,9 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, return getAddRecExpr(Operands, AddRec->getLoop()); } - // The cast wasn't folded; create an explicit cast node. - // Recompute the insert position, as it may have been invalidated. - if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + // The cast wasn't folded; create an explicit cast node. We can reuse + // the existing insert position since if we get here, we won't have + // made any changes which would invalidate it. SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); UniqueSCEVs.InsertNode(S, IP); @@ -862,12 +863,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. - if (const SCEVConstant *SC = dyn_cast(Op)) { - const Type *IntTy = getEffectiveSCEVType(Ty); - Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy); - if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); - return getConstant(cast(C)); - } + if (const SCEVConstant *SC = dyn_cast(Op)) + return getConstant( + cast(ConstantExpr::getZExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); // zext(zext(x)) --> zext(x) if (const SCEVZeroExtendExpr *SZ = dyn_cast(Op)) @@ -997,12 +996,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, Ty = getEffectiveSCEVType(Ty); // Fold if the operand is constant. - if (const SCEVConstant *SC = dyn_cast(Op)) { - const Type *IntTy = getEffectiveSCEVType(Ty); - Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy); - if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty); - return getConstant(cast(C)); - } + if (const SCEVConstant *SC = dyn_cast(Op)) + return getConstant( + cast(ConstantExpr::getSExt(SC->getValue(), + getEffectiveSCEVType(Ty)))); // sext(sext(x)) --> sext(x) if (const SCEVSignExtendExpr *SS = dyn_cast(Op)) @@ -1208,8 +1205,19 @@ CollectAddOperandsWithScales(DenseMap &M, ScalarEvolution &SE) { bool Interesting = false; - // Iterate over the add operands. - for (unsigned i = 0, e = NumOperands; i != e; ++i) { + // Iterate over the add operands. They are sorted, with constants first. + unsigned i = 0; + while (const SCEVConstant *C = dyn_cast(Ops[i])) { + ++i; + // Pull a buried constant out to the outside. + if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) + Interesting = true; + AccumulatedConstant += Scale * C->getValue()->getValue(); + } + + // Next comes everything else. We're especially interested in multiplies + // here, but they're in the middle, so just visit the rest with one loop. + for (; i != NumOperands; ++i) { const SCEVMulExpr *Mul = dyn_cast(Ops[i]); if (Mul && isa(Mul->getOperand(0))) { APInt NewScale = @@ -1237,11 +1245,6 @@ CollectAddOperandsWithScales(DenseMap &M, Interesting = true; } } - } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { - // Pull a buried constant out to the outside. - if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) - Interesting = true; - AccumulatedConstant += Scale * C->getValue()->getValue(); } else { // An ordinary operand. Update the map. std::pair::iterator, bool> Pair = @@ -1275,9 +1278,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG + const Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); for (unsigned i = 1, e = Ops.size(); i != e; ++i) - assert(getEffectiveSCEVType(Ops[i]->getType()) == - getEffectiveSCEVType(Ops[0]->getType()) && + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && "SCEVAddExpr operand types don't match!"); #endif @@ -1400,8 +1403,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { // If we have an add, expand the add operands onto the end of the operands // list. - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); DeletedAdd = true; } @@ -1549,9 +1552,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, AddRec->op_end()); AddRecOps[0] = getAddExpr(LIOps); - // It's tempting to propagate NUW/NSW flags here, but nuw/nsw addition - // is not associative so this isn't necessarily safe. - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop); + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, + HasNUW && AddRec->hasNoUnsignedWrap(), + HasNSW && AddRec->hasNoSignedWrap()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1578,7 +1583,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, AddRec->op_end()); for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) { if (i >= NewOps.size()) { - NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i, + NewOps.append(OtherAddRec->op_begin()+i, OtherAddRec->op_end()); break; } @@ -1711,8 +1716,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, while (const SCEVMulExpr *Mul = dyn_cast(Ops[Idx])) { // If we have an mul, expand the mul operands onto the end of the operands // list. - Ops.insert(Ops.end(), Mul->op_begin(), Mul->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(Mul->op_begin(), Mul->op_end()); DeletedMul = true; } @@ -1747,23 +1752,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} SmallVector NewOps; NewOps.reserve(AddRec->getNumOperands()); - if (LIOps.size() == 1) { - const SCEV *Scale = LIOps[0]; - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) - NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); - } else { - for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { - SmallVector MulOps(LIOps.begin(), LIOps.end()); - MulOps.push_back(AddRec->getOperand(i)); - NewOps.push_back(getMulExpr(MulOps)); - } - } + const SCEV *Scale = getMulExpr(LIOps); + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) + NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i))); - // It's tempting to propagate the NSW flag here, but nsw multiplication - // is not associative so this isn't necessarily safe. + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer mul and the inner addrec are guaranteed to have no overflow. const SCEV *NewRec = getAddRecExpr(NewOps, AddRec->getLoop(), HasNUW && AddRec->hasNoUnsignedWrap(), - /*HasNSW=*/false); + HasNSW && AddRec->hasNoSignedWrap()); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1942,8 +1939,7 @@ const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) if (StepChrec->getLoop() == L) { - Operands.insert(Operands.end(), StepChrec->op_begin(), - StepChrec->op_end()); + Operands.append(StepChrec->op_begin(), StepChrec->op_end()); return getAddRecExpr(Operands, L); } @@ -2106,8 +2102,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { if (Idx < Ops.size()) { bool DeletedSMax = false; while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { - Ops.insert(Ops.end(), SMax->op_begin(), SMax->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(SMax->op_begin(), SMax->op_end()); DeletedSMax = true; } @@ -2211,8 +2207,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { if (Idx < Ops.size()) { bool DeletedUMax = false; while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { - Ops.insert(Ops.end(), UMax->op_begin(), UMax->op_end()); Ops.erase(Ops.begin()+Idx); + Ops.append(UMax->op_begin(), UMax->op_end()); DeletedUMax = true; } @@ -2278,7 +2274,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2286,7 +2283,8 @@ const SCEV *ScalarEvolution::getSizeOfExpr(const Type *AllocTy) { const SCEV *ScalarEvolution::getAlignOfExpr(const Type *AllocTy) { Constant *C = ConstantExpr::getAlignOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2302,7 +2300,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const StructType *STy, Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2311,7 +2310,8 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(const Type *CTy, Constant *FieldNo) { Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) - C = ConstantFoldConstantExpression(CE, TD); + if (Constant *Folded = ConstantFoldConstantExpression(CE, TD)) + C = Folded; const Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2398,13 +2398,6 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) { return S; } -/// getIntegerSCEV - Given a SCEVable type, create a constant for the -/// specified signed integer value and return a SCEV for the constant. -const SCEV *ScalarEvolution::getIntegerSCEV(int64_t Val, const Type *Ty) { - const IntegerType *ITy = cast(getEffectiveSCEVType(Ty)); - return getConstant(ConstantInt::get(ITy, Val)); -} - /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V /// const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { @@ -2772,7 +2765,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { - bool InBounds = GEP->isInBounds(); + // Don't blindly transfer the inbounds flag from the GEP instruction to the + // Add expression, because the Instruction may be guarded by control flow + // and the no-overflow bits may not be valid for the expression in any + // context. + const Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); Value *Base = GEP->getOperand(0); // Don't attempt to analyze GEPs over unsized objects. @@ -2788,23 +2785,30 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (const StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); - TotalOffset = getAddExpr(TotalOffset, - getOffsetOfExpr(STy, FieldNo), - /*HasNUW=*/false, /*HasNSW=*/InBounds); + const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + + // Add the field offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *LocalOffset = getSCEV(Index); + const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. - LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy); - // Lower "inbounds" GEPs to NSW arithmetic. - LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI), - /*HasNUW=*/false, /*HasNSW=*/InBounds); - TotalOffset = getAddExpr(TotalOffset, LocalOffset, - /*HasNUW=*/false, /*HasNSW=*/InBounds); + IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); + + // Multiply the index by the element size to compute the element offset. + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize); + + // Add the element offset to the running total offset. + TotalOffset = getAddExpr(TotalOffset, LocalOffset); } } - return getAddExpr(getSCEV(Base), TotalOffset, - /*HasNUW=*/false, /*HasNSW=*/InBounds); + + // Get the SCEV for the GEP base. + const SCEV *BaseS = getSCEV(Base); + + // Add the total offset from all the GEP indices to the base. + return getAddExpr(BaseS, TotalOffset); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -2963,7 +2967,8 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVConstant *C = dyn_cast(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = - ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0)); + ConservativeResult.intersectWith( + ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); // TODO: non-affine addrec if (AddRec->isAffine()) { @@ -3196,15 +3201,9 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { Operator *U = cast(V); switch (Opcode) { case Instruction::Add: - // Don't transfer the NSW and NUW bits from the Add instruction to the - // Add expression, because the Instruction may be guarded by control - // flow and the no-overflow bits may not be valid for the expression in - // any context. return getAddExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::Mul: - // Don't transfer the NSW and NUW bits from the Mul instruction to the - // Mul expression, as with Add. return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1))); case Instruction::UDiv: @@ -3658,6 +3657,26 @@ void ScalarEvolution::forgetValue(Value *V) { ConstantEvolutionLoopExitValue.erase(PN); } + // If there's a SCEVUnknown tying this value into the SCEV + // space, remove it from the folding set map. The SCEVUnknown + // object and any other SCEV objects which reference it + // (transitively) remain allocated, effectively leaked until + // the underlying BumpPtrAllocator is freed. + // + // This permits SCEV pointers to be used as keys in maps + // such as the ValuesAtScopes map. + FoldingSetNodeID ID; + ID.AddInteger(scUnknown); + ID.AddPointer(I); + void *IP; + if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { + UniqueSCEVs.RemoveNode(S); + + // This isn't necessary, but we might as well remove the + // value from the ValuesAtScopes map too. + ValuesAtScopes.erase(S); + } + PushDefUseChildren(I, Worklist); } } @@ -4139,8 +4158,7 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { // constant or derived from a PHI node themselves. PHINode *PHI = 0; for (unsigned Op = 0, e = I->getNumOperands(); Op != e; ++Op) - if (!(isa(I->getOperand(Op)) || - isa(I->getOperand(Op)))) { + if (!isa(I->getOperand(Op))) { PHINode *P = getConstantEvolvingPHI(I->getOperand(Op), L); if (P == 0) return 0; // Not evolving from PHI if (PHI == 0) @@ -4161,11 +4179,9 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal, const TargetData *TD) { if (isa(V)) return PHIVal; if (Constant *C = dyn_cast(V)) return C; - if (GlobalValue *GV = dyn_cast(V)) return GV; Instruction *I = cast(V); - std::vector Operands; - Operands.resize(I->getNumOperands()); + std::vector Operands(I->getNumOperands()); for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD); @@ -4207,8 +4223,8 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, return RetVal = 0; // Must be a constant. Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); - if (PN2 != PN) + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa(BEValue)) return RetVal = 0; // Not derived from same PHI. // Execute the loop symbolically to determine the exit value. @@ -4243,8 +4259,11 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); - // Since the loop is canonicalized, the PHI node must have two entries. One - // entry must be a constant (coming in from outside of the loop), and the + // If the loop is canonicalized, the PHI will have exactly two entries. + // That's the only form we support here. + if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); Constant *StartCST = @@ -4252,8 +4271,9 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L, if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - PHINode *PN2 = getConstantEvolvingPHI(BEValue, L); - if (PN2 != PN) return getCouldNotCompute(); // Not derived from same PHI. + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of @@ -4341,54 +4361,51 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // the arguments into constants, and if so, try to constant propagate the // result. This is particularly useful for computing loop exit values. if (CanConstantFold(I)) { - std::vector Operands; - Operands.reserve(I->getNumOperands()); + SmallVector Operands; + bool MadeImprovement = false; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { Value *Op = I->getOperand(i); if (Constant *C = dyn_cast(Op)) { Operands.push_back(C); - } else { - // If any of the operands is non-constant and if they are - // non-integer and non-pointer, don't even try to analyze them - // with scev techniques. - if (!isSCEVable(Op->getType())) - return V; - - const SCEV *OpV = getSCEVAtScope(Op, L); - if (const SCEVConstant *SC = dyn_cast(OpV)) { - Constant *C = SC->getValue(); - if (C->getType() != Op->getType()) - C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - Op->getType(), - false), - C, Op->getType()); - Operands.push_back(C); - } else if (const SCEVUnknown *SU = dyn_cast(OpV)) { - if (Constant *C = dyn_cast(SU->getValue())) { - if (C->getType() != Op->getType()) - C = - ConstantExpr::getCast(CastInst::getCastOpcode(C, false, - Op->getType(), - false), - C, Op->getType()); - Operands.push_back(C); - } else - return V; - } else { - return V; - } + continue; } + + // If any of the operands is non-constant and if they are + // non-integer and non-pointer, don't even try to analyze them + // with scev techniques. + if (!isSCEVable(Op->getType())) + return V; + + const SCEV *OrigV = getSCEV(Op); + const SCEV *OpV = getSCEVAtScope(OrigV, L); + MadeImprovement |= OrigV != OpV; + + Constant *C = 0; + if (const SCEVConstant *SC = dyn_cast(OpV)) + C = SC->getValue(); + if (const SCEVUnknown *SU = dyn_cast(OpV)) + C = dyn_cast(SU->getValue()); + if (!C) return V; + if (C->getType() != Op->getType()) + C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, + Op->getType(), + false), + C, Op->getType()); + Operands.push_back(C); } - Constant *C = 0; - if (const CmpInst *CI = dyn_cast(I)) - C = ConstantFoldCompareInstOperands(CI->getPredicate(), - Operands[0], Operands[1], TD); - else - C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), - &Operands[0], Operands.size(), TD); - if (C) + // Check to see if getSCEVAtScope actually made an improvement. + if (MadeImprovement) { + Constant *C = 0; + if (const CmpInst *CI = dyn_cast(I)) + C = ConstantFoldCompareInstOperands(CI->getPredicate(), + Operands[0], Operands[1], TD); + else + C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), + &Operands[0], Operands.size(), TD); + if (!C) return V; return getSCEV(C); + } } } @@ -4438,7 +4455,29 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // If this is a loop recurrence for a loop that does not contain L, then we // are dealing with the final value computed by the loop. if (const SCEVAddRecExpr *AddRec = dyn_cast(V)) { - if (!L || !AddRec->getLoop()->contains(L)) { + // First, attempt to evaluate each operand. + // Avoid performing the look-up in the common case where the specified + // expression has no loop-variant portions. + for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { + const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); + if (OpAtScope == AddRec->getOperand(i)) + continue; + + // Okay, at least one of these operands is loop variant but might be + // foldable. Build a new instance of the folded commutative expression. + SmallVector NewOps(AddRec->op_begin(), + AddRec->op_begin()+i); + NewOps.push_back(OpAtScope); + for (++i; i != e; ++i) + NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); + + AddRec = cast(getAddRecExpr(NewOps, AddRec->getLoop())); + break; + } + + // If the scope is outside the addrec's loop, evaluate it by using the + // loop exit value of the addrec. + if (!AddRec->getLoop()->contains(L)) { // To evaluate this recurrence, we need to know how many times the AddRec // loop iterates. Compute this now. const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); @@ -4447,6 +4486,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { // Then, evaluate the AddRec. return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); } + return AddRec; } @@ -4696,23 +4736,6 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) { return getCouldNotCompute(); } -/// getLoopPredecessor - If the given loop's header has exactly one unique -/// predecessor outside the loop, return it. Otherwise return null. -/// This is less strict that the loop "preheader" concept, which requires -/// the predecessor to have only one single successor. -/// -BasicBlock *ScalarEvolution::getLoopPredecessor(const Loop *L) { - BasicBlock *Header = L->getHeader(); - BasicBlock *Pred = 0; - for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); - PI != E; ++PI) - if (!L->contains(*PI)) { - if (Pred && Pred != *PI) return 0; // Multiple predecessors. - Pred = *PI; - } - return Pred; -} - /// getPredecessorWithUniqueSuccessorForBB - Return a predecessor of BB /// (which may not be an immediate predecessor) which has exactly one /// successor from which BB is reachable, or null if no such block is @@ -4730,7 +4753,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { // If the header has a unique predecessor outside the loop, it must be // a block that has exactly one successor that can reach the loop. if (Loop *L = LI->getLoopFor(BB)) - return std::make_pair(getLoopPredecessor(L), L->getHeader()); + return std::make_pair(L->getLoopPredecessor(), L->getHeader()); return std::pair(); } @@ -5181,7 +5204,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, // as there are predecessors that can be found that have unique successors // leading to the original header. for (std::pair - Pair(getLoopPredecessor(L), L->getHeader()); + Pair(L->getLoopPredecessor(), L->getHeader()); Pair.first; Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 17b254f..58711b8 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -12,7 +12,7 @@ // // This differs from traditional loop dependence analysis in that it tests // for dependencies within a single iteration of a loop, rather than -// dependences between different iterations. +// dependencies between different iterations. // // ScalarEvolution has a more complete understanding of pointer arithmetic // than BasicAliasAnalysis' collection of ad-hoc analyses. @@ -106,6 +106,12 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { AliasAnalysis::AliasResult ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, const Value *B, unsigned BSize) { + // If either of the memory references is empty, it doesn't matter what the + // pointer values are. This allows the code below to ignore this special + // case. + if (ASize == 0 || BSize == 0) + return NoAlias; + // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! const SCEV *AS = SE->getSCEV(const_cast(A)); const SCEV *BS = SE->getSCEV(const_cast(B)); @@ -118,14 +124,32 @@ ScalarEvolutionAliasAnalysis::alias(const Value *A, unsigned ASize, if (SE->getEffectiveSCEVType(AS->getType()) == SE->getEffectiveSCEVType(BS->getType())) { unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); - APInt AI(BitWidth, ASize); + APInt ASizeInt(BitWidth, ASize); + APInt BSizeInt(BitWidth, BSize); + + // Compute the difference between the two pointers. const SCEV *BA = SE->getMinusSCEV(BS, AS); - if (AI.ule(SE->getUnsignedRange(BA).getUnsignedMin())) { - APInt BI(BitWidth, BSize); - const SCEV *AB = SE->getMinusSCEV(AS, BS); - if (BI.ule(SE->getUnsignedRange(AB).getUnsignedMin())) - return NoAlias; - } + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && + (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) + return NoAlias; + + // Folding the subtraction while preserving range information can be tricky + // (because of INT_MIN, etc.); if the prior test failed, swap AS and BS + // and try again to see if things fold better that way. + + // Compute the difference between the two pointers. + const SCEV *AB = SE->getMinusSCEV(AS, BS); + + // Test whether the difference is known to be great enough that memory of + // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt + // are non-zero, which is special-cased above. + if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && + (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) + return NoAlias; } // If ScalarEvolution can find an underlying object, form a new query. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 0012b84..d4a4b26 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -21,6 +21,43 @@ #include "llvm/ADT/STLExtras.h" using namespace llvm; +/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, +/// reusing an existing cast if a suitable one exists, moving an existing +/// cast if a suitable one exists but isn't in the right place, or +/// creating a new one. +Value *SCEVExpander::ReuseOrCreateCast(Value *V, const Type *Ty, + Instruction::CastOps Op, + BasicBlock::iterator IP) { + // Check to see if there is already a cast! + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + User *U = *UI; + if (U->getType() == Ty) + if (CastInst *CI = dyn_cast(U)) + if (CI->getOpcode() == Op) { + // If the cast isn't where we want it, fix it. + if (BasicBlock::iterator(CI) != IP) { + // Create a new cast, and leave the old cast in place in case + // it is being used as an insert point. Clear its operand + // so that it doesn't hold anything live. + Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); + NewCI->takeName(CI); + CI->replaceAllUsesWith(NewCI); + CI->setOperand(0, UndefValue::get(V->getType())); + rememberInstruction(NewCI); + return NewCI; + } + rememberInstruction(CI); + return CI; + } + } + + // Create a new cast. + Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); + rememberInstruction(I); + return I; +} + /// InsertNoopCastOfTo - Insert a cast of V to the specified type, /// which must be possible with a noop cast, doing what we can to share /// the casts. @@ -54,71 +91,29 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, const Type *Ty) { return CE->getOperand(0); } + // Fold a cast of a constant. if (Constant *C = dyn_cast(V)) return ConstantExpr::getCast(Op, C, Ty); + // Cast the argument at the beginning of the entry block, after + // any bitcasts of other arguments. if (Argument *A = dyn_cast(V)) { - // Check to see if there is already a cast! - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast(cast(*UI))) - if (CI->getOpcode() == Op) { - // If the cast isn't the first instruction of the function, move it. - if (BasicBlock::iterator(CI) != - A->getParent()->getEntryBlock().begin()) { - // Recreate the cast at the beginning of the entry block. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = - CastInst::Create(Op, V, Ty, "", - A->getParent()->getEntryBlock().begin()); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - return NewCI; - } - return CI; - } - - Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), - A->getParent()->getEntryBlock().begin()); - rememberInstruction(I); - return I; + BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); + while ((isa(IP) && + isa(cast(IP)->getOperand(0)) && + cast(IP)->getOperand(0) != A) || + isa(IP)) + ++IP; + return ReuseOrCreateCast(A, Ty, Op, IP); } + // Cast the instruction immediately after the instruction. Instruction *I = cast(V); - - // Check to see if there is already a cast. If there is, use it. - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) { - if ((*UI)->getType() == Ty) - if (CastInst *CI = dyn_cast(cast(*UI))) - if (CI->getOpcode() == Op) { - BasicBlock::iterator It = I; ++It; - if (isa(I)) - It = cast(I)->getNormalDest()->begin(); - while (isa(It)) ++It; - if (It != BasicBlock::iterator(CI)) { - // Recreate the cast after the user. - // The old cast is left in place in case it is being used - // as an insert point. - Instruction *NewCI = CastInst::Create(Op, V, Ty, "", It); - NewCI->takeName(CI); - CI->replaceAllUsesWith(NewCI); - rememberInstruction(NewCI); - return NewCI; - } - rememberInstruction(CI); - return CI; - } - } BasicBlock::iterator IP = I; ++IP; if (InvokeInst *II = dyn_cast(I)) IP = II->getNormalDest()->begin(); - while (isa(IP)) ++IP; - Instruction *CI = CastInst::Create(Op, V, Ty, V->getName(), IP); - rememberInstruction(CI); - return CI; + while (isa(IP) || isa(IP)) ++IP; + return ReuseOrCreateCast(I, Ty, Op, IP); } /// InsertBinop - Insert the specified binary operator, doing a small amount @@ -295,11 +290,11 @@ static void SimplifyAddOperands(SmallVectorImpl &Ops, // the sum into a single value, so just use that. Ops.clear(); if (const SCEVAddExpr *Add = dyn_cast(Sum)) - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.append(Add->op_begin(), Add->op_end()); else if (!Sum->isZero()) Ops.push_back(Sum); // Then append the addrecs. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); } /// SplitAddRecs - Flatten a list of add operands, moving addrec start values @@ -322,7 +317,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, A->getLoop())); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; - Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + Ops.append(Add->op_begin(), Add->op_end()); e += Add->getNumOperands(); } else { Ops[i] = Start; @@ -330,7 +325,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, } if (!AddRecs.empty()) { // Add the addrecs onto the end of the list. - Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); + Ops.append(AddRecs.begin(), AddRecs.end()); // Resort the operand list, moving any constants to the front. SimplifyAddOperands(Ops, Ty, SE); } @@ -1070,7 +1065,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast(V))); - while (isa(NewInsertPt)) ++NewInsertPt; + while (isa(NewInsertPt) || isa(NewInsertPt)) + ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); restoreInsertPoint(SaveInsertBB, SaveInsertPt); @@ -1107,8 +1103,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { } // {0,+,1} --> Insert a canonical induction variable into the loop! - if (S->isAffine() && - S->getOperand(1) == SE.getConstant(Ty, 1)) { + if (S->isAffine() && S->getOperand(1)->isOne()) { // If there's a canonical IV, just use it. if (CanonicalIV) { assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && @@ -1125,17 +1120,19 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) - if (L->contains(*HPI)) { + HPI != HPE; ++HPI) { + BasicBlock *HP = *HPI; + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. Instruction *Add = BinaryOperator::CreateAdd(PN, One, "indvar.next", - (*HPI)->getTerminator()); + HP->getTerminator()); rememberInstruction(Add); - PN->addIncoming(Add, *HPI); + PN->addIncoming(Add, HP); } else { - PN->addIncoming(Constant::getNullValue(Ty), *HPI); + PN->addIncoming(Constant::getNullValue(Ty), HP); } + } } // {0,+,F} --> {0,+,1} * F @@ -1312,7 +1309,9 @@ Value *SCEVExpander::expand(const SCEV *S) { } void SCEVExpander::rememberInstruction(Value *I) { - if (PostIncLoops.empty()) + if (!PostIncLoops.empty()) + InsertedPostIncValues.insert(I); + else InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index 75c381d..563fd2f 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -105,22 +105,25 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, case NormalizeAutodetect: if (Instruction *OI = dyn_cast(OperandValToReplace)) if (IVUseShouldUsePostIncValue(User, OI, L, &DT)) { - Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); + const SCEV *TransformedStep = + TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), + User, OperandValToReplace, Loops, SE, DT); + Result = SE.getMinusSCEV(Result, TransformedStep); Loops.insert(L); } break; case Normalize: - if (Loops.count(L)) - Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); - break; - case Denormalize: if (Loops.count(L)) { const SCEV *TransformedStep = TransformForPostIncUse(Kind, AR->getStepRecurrence(SE), User, OperandValToReplace, Loops, SE, DT); - Result = SE.getAddExpr(Result, TransformedStep); + Result = SE.getMinusSCEV(Result, TransformedStep); } break; + case Denormalize: + if (Loops.count(L)) + Result = SE.getAddExpr(Result, AR->getStepRecurrence(SE)); + break; } return Result; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 7e8ec2e..b4c9884 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -953,7 +953,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const IntrinsicInst *II = dyn_cast(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) - return CannotBeNegativeZero(II->getOperand(1), Depth+1); + return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) { @@ -966,7 +966,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (F->getName() == "fabsl") return true; if (F->getName() == "sqrt" || F->getName() == "sqrtf" || F->getName() == "sqrtl") - return CannotBeNegativeZero(CI->getOperand(1), Depth+1); + return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 21d4f65..7eeeb59 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -366,8 +366,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // Check for errors opening or creating archive file. if (!ArchiveFile.is_open() || ArchiveFile.bad()) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); if (ErrMsg) *ErrMsg = "Error opening archive file: " + archPath.str(); return true; @@ -387,8 +386,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, TruncateNames, Compress, ErrMsg)) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); ArchiveFile.close(); return true; } @@ -420,8 +418,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); if (!FinalFile.is_open() || FinalFile.bad()) { - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); if (ErrMsg) *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); return true; @@ -438,8 +435,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (foreignST) { if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { FinalFile.close(); - if (TmpArchive.exists()) - TmpArchive.eraseFromDisk(); + TmpArchive.eraseFromDisk(); return true; } } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 9b4370f..f4c0e50 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -492,6 +492,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); + KEYWORD(linker_private_weak); KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 226d8d3..6752181 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -196,19 +196,20 @@ bool LLParser::ParseTopLevelEntities() { // optional leading prefixes, the production is: // GlobalVar ::= OptionalLinkage OptionalVisibility OptionalThreadLocal // OptionalAddrSpace ('constant'|'global') ... - case lltok::kw_private : // OptionalLinkage - case lltok::kw_linker_private: // OptionalLinkage - case lltok::kw_internal: // OptionalLinkage - case lltok::kw_weak: // OptionalLinkage - case lltok::kw_weak_odr: // OptionalLinkage - case lltok::kw_linkonce: // OptionalLinkage - case lltok::kw_linkonce_odr: // OptionalLinkage - case lltok::kw_appending: // OptionalLinkage - case lltok::kw_dllexport: // OptionalLinkage - case lltok::kw_common: // OptionalLinkage - case lltok::kw_dllimport: // OptionalLinkage - case lltok::kw_extern_weak: // OptionalLinkage - case lltok::kw_external: { // OptionalLinkage + case lltok::kw_private: // OptionalLinkage + case lltok::kw_linker_private: // OptionalLinkage + case lltok::kw_linker_private_weak: // OptionalLinkage + case lltok::kw_internal: // OptionalLinkage + case lltok::kw_weak: // OptionalLinkage + case lltok::kw_weak_odr: // OptionalLinkage + case lltok::kw_linkonce: // OptionalLinkage + case lltok::kw_linkonce_odr: // OptionalLinkage + case lltok::kw_appending: // OptionalLinkage + case lltok::kw_dllexport: // OptionalLinkage + case lltok::kw_common: // OptionalLinkage + case lltok::kw_dllimport: // OptionalLinkage + case lltok::kw_extern_weak: // OptionalLinkage + case lltok::kw_external: { // OptionalLinkage unsigned Linkage, Visibility; if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || @@ -629,7 +630,8 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, Linkage != GlobalValue::WeakODRLinkage && Linkage != GlobalValue::InternalLinkage && Linkage != GlobalValue::PrivateLinkage && - Linkage != GlobalValue::LinkerPrivateLinkage) + Linkage != GlobalValue::LinkerPrivateLinkage && + Linkage != GlobalValue::LinkerPrivateWeakLinkage) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -1013,11 +1015,13 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { /// ::= /*empty*/ /// ::= 'private' /// ::= 'linker_private' +/// ::= 'linker_private_weak' /// ::= 'internal' /// ::= 'weak' /// ::= 'weak_odr' /// ::= 'linkonce' /// ::= 'linkonce_odr' +/// ::= 'available_externally' /// ::= 'appending' /// ::= 'dllexport' /// ::= 'common' @@ -1030,6 +1034,9 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { default: Res=GlobalValue::ExternalLinkage; return false; case lltok::kw_private: Res = GlobalValue::PrivateLinkage; break; case lltok::kw_linker_private: Res = GlobalValue::LinkerPrivateLinkage; break; + case lltok::kw_linker_private_weak: + Res = GlobalValue::LinkerPrivateWeakLinkage; + break; case lltok::kw_internal: Res = GlobalValue::InternalLinkage; break; case lltok::kw_weak: Res = GlobalValue::WeakAnyLinkage; break; case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; @@ -2704,6 +2711,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { break; case GlobalValue::PrivateLinkage: case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: case GlobalValue::InternalLinkage: case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: @@ -3791,8 +3799,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, } } - if (Size && !Size->getType()->isIntegerTy(32)) - return Error(SizeLoc, "element count must be i32"); + if (Size && !Size->getType()->isIntegerTy()) + return Error(SizeLoc, "element count must have integer type"); if (isAlloca) { Inst = new AllocaInst(Ty, Size, Alignment); @@ -3801,6 +3809,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS, // Autoupgrade old malloc instruction to malloc call. // FIXME: Remove in LLVM 3.0. + if (Size && !Size->getType()->isIntegerTy(32)) + return Error(SizeLoc, "element count must be i32"); const Type *IntPtrTy = Type::getInt32Ty(Context); Constant *AllocSize = ConstantExpr::getSizeOf(Ty); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 5eed170..2703134 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -37,9 +37,9 @@ namespace lltok { kw_declare, kw_define, kw_global, kw_constant, - kw_private, kw_linker_private, kw_internal, kw_linkonce, kw_linkonce_odr, - kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, - kw_available_externally, + kw_private, kw_linker_private, kw_linker_private_weak, kw_internal, + kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, + kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_extern_weak, kw_external, kw_thread_local, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 69adead..527ae49 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -75,6 +75,7 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 11: return GlobalValue::LinkOnceODRLinkage; case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; + case 14: return GlobalValue::LinkerPrivateWeakLinkage; } } @@ -252,17 +253,18 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { // at once. while (!Placeholder->use_empty()) { Value::use_iterator UI = Placeholder->use_begin(); + User *U = *UI; // If the using object isn't uniqued, just update the operands. This // handles instructions and initializers for global variables. - if (!isa(*UI) || isa(*UI)) { + if (!isa(U) || isa(U)) { UI.getUse().set(RealVal); continue; } // Otherwise, we have a constant that uses the placeholder. Replace that // constant with a new constant that has *all* placeholder uses updated. - Constant *UserC = cast(*UI); + Constant *UserC = cast(U); for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E; ++I) { Value *NewOp; @@ -2178,13 +2180,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { InstructionList.push_back(I); break; } - case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, op, align] - if (Record.size() < 3) + case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] + // For backward compatibility, tolerate a lack of an opty, and use i32. + // LLVM 3.0: Remove this. + if (Record.size() < 3 || Record.size() > 4) return Error("Invalid ALLOCA record"); + unsigned OpNum = 0; const PointerType *Ty = - dyn_cast_or_null(getTypeByID(Record[0])); - Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context)); - unsigned Align = Record[2]; + dyn_cast_or_null(getTypeByID(Record[OpNum++])); + const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) : + Type::getInt32Ty(Context); + Value *Size = getFnValueByID(Record[OpNum++], OpTy); + unsigned Align = Record[OpNum++]; if (!Ty || !Size) return Error("Invalid ALLOCA record"); I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); InstructionList.push_back(I); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9bda6dc..fa1b2c4 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -313,6 +313,7 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::LinkOnceODRLinkage: return 11; case GlobalValue::AvailableExternallyLinkage: return 12; case GlobalValue::LinkerPrivateLinkage: return 13; + case GlobalValue::LinkerPrivateWeakLinkage: return 14; } } @@ -577,10 +578,9 @@ static void WriteFunctionLocalMetadata(const Function &F, BitstreamWriter &Stream) { bool StartedMetadataBlock = false; SmallVector Record; - const ValueEnumerator::ValueList &Vals = VE.getMDValues(); - + const SmallVector &Vals = VE.getFunctionLocalMDValues(); for (unsigned i = 0, e = Vals.size(); i != e; ++i) - if (const MDNode *N = dyn_cast(Vals[i].first)) + if (const MDNode *N = Vals[i]) if (N->isFunctionLocal() && N->getFunction() == &F) { if (!StartedMetadataBlock) { Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); @@ -588,7 +588,7 @@ static void WriteFunctionLocalMetadata(const Function &F, } WriteMDNode(N, VE, Stream, Record); } - + if (StartedMetadataBlock) Stream.ExitBlock(); } @@ -1114,6 +1114,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Alloca: Code = bitc::FUNC_CODE_INST_ALLOCA; Vals.push_back(VE.getTypeID(I.getType())); + Vals.push_back(VE.getTypeID(I.getOperand(0)->getType())); Vals.push_back(VE.getValueID(I.getOperand(0))); // size. Vals.push_back(Log2_32(cast(I).getAlignment())+1); break; @@ -1134,26 +1135,25 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, Vals.push_back(cast(I).isVolatile()); break; case Instruction::Call: { - const PointerType *PTy = cast(I.getOperand(0)->getType()); + const CallInst &CI = cast(I); + const PointerType *PTy = cast(CI.getCalledValue()->getType()); const FunctionType *FTy = cast(PTy->getElementType()); Code = bitc::FUNC_CODE_INST_CALL; - const CallInst *CI = cast(&I); - Vals.push_back(VE.getAttributeID(CI->getAttributes())); - Vals.push_back((CI->getCallingConv() << 1) | unsigned(CI->isTailCall())); - PushValueAndType(CI->getOperand(0), InstID, Vals, VE); // Callee + Vals.push_back(VE.getAttributeID(CI.getAttributes())); + Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall())); + PushValueAndType(CI.getCalledValue(), InstID, Vals, VE); // Callee // Emit value #'s for the fixed parameters. for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - Vals.push_back(VE.getValueID(I.getOperand(i+1))); // fixed param. + Vals.push_back(VE.getValueID(CI.getArgOperand(i))); // fixed param. // Emit type/value pairs for varargs params. if (FTy->isVarArg()) { - unsigned NumVarargs = I.getNumOperands()-1-FTy->getNumParams(); - for (unsigned i = I.getNumOperands()-NumVarargs, e = I.getNumOperands(); + for (unsigned i = FTy->getNumParams(), e = CI.getNumArgOperands(); i != e; ++i) - PushValueAndType(I.getOperand(i), InstID, Vals, VE); // varargs + PushValueAndType(CI.getArgOperand(i), InstID, Vals, VE); // varargs } break; } @@ -1662,15 +1662,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { WriteBitcodeToStream( M, Stream ); - // If writing to stdout, set binary mode. - if (&llvm::outs() == &Out) - sys::Program::ChangeStdoutToBinary(); - // Write the generated bitstream to "Out". Out.write((char*)&Buffer.front(), Buffer.size()); - - // Make sure it hits disk now. - Out.flush(); } /// WriteBitcodeToStream - Write the specified module to the specified output diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index d2baec7..7fa425a 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -72,7 +72,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate types used by the type symbol table. EnumerateTypeSymbolTable(M->getTypeSymbolTable()); - // Insert constants and metadata that are named at module level into the slot + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); EnumerateMDSymbolTable(M->getMDSymbolTable()); @@ -257,6 +257,8 @@ void ValueEnumerator::EnumerateMetadata(const Value *MD) { else EnumerateType(Type::getVoidTy(MD->getContext())); } + if (N->isFunctionLocal() && N->getFunction()) + FunctionLocalMDs.push_back(N); return; } @@ -414,7 +416,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) { FirstInstID = Values.size(); - SmallVector FunctionLocalMDs; + FunctionLocalMDs.clear(); + SmallVector FnLocalMDVector; // Add all of the instructions. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) { @@ -423,7 +426,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) { if (MDNode *MD = dyn_cast(*OI)) if (MD->isFunctionLocal() && MD->getFunction()) // Enumerate metadata after the instructions they might refer to. - FunctionLocalMDs.push_back(MD); + FnLocalMDVector.push_back(MD); } if (!I->getType()->isVoidTy()) EnumerateValue(I); @@ -431,8 +434,8 @@ void ValueEnumerator::incorporateFunction(const Function &F) { } // Add all of the function-local metadata. - for (unsigned i = 0, e = FunctionLocalMDs.size(); i != e; ++i) - EnumerateOperandType(FunctionLocalMDs[i]); + for (unsigned i = 0, e = FnLocalMDVector.size(); i != e; ++i) + EnumerateOperandType(FnLocalMDVector[i]); } void ValueEnumerator::purgeFunction() { diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 4f8ebf5..2b9b15f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -15,6 +15,7 @@ #define VALUE_ENUMERATOR_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Attributes.h" #include @@ -26,7 +27,7 @@ class Instruction; class BasicBlock; class Function; class Module; -class MetadataBase; +class MDNode; class NamedMDNode; class AttrListPtr; class TypeSymbolTable; @@ -49,6 +50,7 @@ private: ValueMapType ValueMap; ValueList Values; ValueList MDValues; + SmallVector FunctionLocalMDs; ValueMapType MDValueMap; typedef DenseMap AttributeMapType; @@ -105,6 +107,9 @@ public: const ValueList &getValues() const { return Values; } const ValueList &getMDValues() const { return MDValues; } + const SmallVector &getFunctionLocalMDValues() const { + return FunctionLocalMDs; + } const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { return BasicBlocks; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 4008a6a..a7189ac 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL) { @@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) { + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, std::multimap& RegRefs = State->GetRegRefs(); + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // for the register. HandleLastUse(Reg, Count, "(last-use)"); - // If MI's uses have special allocation requirement, don't allow - // any use registers to be changed. Also assume all registers - // used in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) { + if (Special) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -604,8 +626,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. const TargetRegisterClass *SuperRC = - TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); @@ -905,6 +931,19 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } } // We just went back in time and modified history; the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 506d43e..91ebb85 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -115,6 +115,7 @@ namespace llvm { class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5a0c27b..d9387a8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -199,7 +199,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); @@ -225,6 +225,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { break; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: + case GlobalValue::LinkerPrivateLinkage: break; default: llvm_unreachable("Unknown linkage type!"); @@ -330,7 +331,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), MangSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); @@ -353,7 +353,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; - OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"), + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); @@ -428,20 +428,12 @@ void AsmPrinter::EmitFunctionHeader() { // Emit pre-function debug and/or EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->BeginFunction(MF); - } else { - DE->BeginFunction(MF); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->BeginFunction(MF); } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginFunction(MF); - } else { - DD->beginFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginFunction(MF); } } @@ -458,14 +450,11 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); +static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(MF->getFunction()->getContext())); + DIScope Scope(DL.getScope(Ctx)); // Omit the directory, because it's likely to be long and uninteresting. if (Scope.Verify()) CommentOS << Scope.getFilename(); @@ -474,6 +463,23 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << ':' << DL.getLine(); if (DL.getCol() != 0) CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << "[ "; + EmitDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +/// EmitComments - Pretty-print comments for instructions. +static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + DebugLoc DL = MI.getDebugLoc(); + if (!DL.isUnknown()) { // Print source line info. + EmitDebugLoc(DL, MF, CommentOS); CommentOS << '\n'; } @@ -611,12 +617,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginScope(II); - } else { - DD->beginScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginScope(II); } if (isVerbose()) @@ -649,12 +651,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endScope(II); - } else { - DD->endScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endScope(II); } } } @@ -692,20 +690,12 @@ void AsmPrinter::EmitFunctionBody() { // Emit post-function debug information. if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endFunction(MF); - } else { - DD->endFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endFunction(MF); } if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndFunction(); - } else { - DE->EndFunction(); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndFunction(); } MMI->EndFunction(); @@ -730,19 +720,15 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndModule(); - } else { + { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); DE->EndModule(); } delete DE; DE = 0; } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endModule(); - } else { + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endModule(); } delete DD; DD = 0; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ba6fed2..f6f3bae 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -83,7 +83,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); + AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI); OwningPtr TAP(TM.getTarget().createAsmParser(Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" @@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = 1; + unsigned OpNo = 2; bool Error = false; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index b2c70d5..21396ca 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -201,6 +201,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -221,6 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); break; } return 0; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 890507c..65c1d19 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -44,7 +44,8 @@ using namespace llvm; static cl::opt PrintDbgScope("print-dbgscope", cl::Hidden, cl::desc("Print DbgScope information for each machine instruction")); -static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, +static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", + cl::Hidden, cl::desc("Disable debug info printing")); static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, @@ -79,15 +80,13 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// GVToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. - /// FIXME : Rename GVToDieMap -> NodeToDieMap - DenseMap GVToDieMap; + DenseMap MDNodeToDieMap; - /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. - /// FIXME : Rename - DenseMap GVToDIEEntryMap; + DenseMap MDNodeToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -123,25 +122,25 @@ public: /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); } + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } /// insertDIE - Insert DIE into the map. void insertDIE(const MDNode *N, DIE *D) { - GVToDieMap.insert(std::make_pair(N, D)); + MDNodeToDieMap.insert(std::make_pair(N, D)); } /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap::iterator I = GVToDIEEntryMap.find(N); - if (I == GVToDIEEntryMap.end()) + DenseMap::iterator I = MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) return NULL; return I->second; } /// insertDIEEntry - Insert debug information entry into the map. void insertDIEEntry(const MDNode *N, DIEEntry *E) { - GVToDIEEntryMap.insert(std::make_pair(N, E)); + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); } /// addDie - Adds or interns the DIE to the compile unit. @@ -321,12 +320,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugLineSectionSym = CurrentLineSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - beginModule(M); - } else { - beginModule(M); + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(M); } } DwarfDebug::~DwarfDebug() { @@ -378,7 +377,8 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -866,6 +866,10 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context), + /*MakeDecl=*/false); + ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); else @@ -1055,6 +1059,10 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } break; } default: @@ -1065,8 +1073,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -1329,6 +1338,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { // DW_TAG_inlined_subroutine may refer to this DIE. SPCU->insertDIE(SP, SPDie); + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + return SPDie; } @@ -1379,6 +1391,7 @@ static bool isSubprogramContext(const MDNode *Context) { DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { CompileUnit *SPCU = getCompileUnit(SPNode); DIE *SPDie = SPCU->getDIE(SPNode); + assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); @@ -1412,6 +1425,14 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { SPCU->addDie(SPDie); } + // Pick up abstract subprogram DIE. + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1483,7 +1504,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - if (StartLabel == FunctionBeginSym || EndLabel == 0) { + if (StartLabel == 0 || EndLabel == 0) { assert (0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } @@ -1605,11 +1626,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { if (DVInsn->getOperand(0).isReg()) - updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isImm()) updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) - updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); if (Location.getReg()) { @@ -1682,8 +1705,13 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { - if (Scope->isAbstractScope()) + ProcessedSPNodes.insert(DS); + if (Scope->isAbstractScope()) { ScopeDIE = getCompileUnit(DS)->getDIE(DS); + // Note down abstract DIE. + if (ScopeDIE) + AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); + } else ScopeDIE = updateSubprogramScopeDIE(DS); } @@ -1782,11 +1810,11 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0); + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. It is always zero when only one - // compile unit is emitted in one object file. - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + // compile unit in debug_line section. This offset is calculated + // during endMoudle(). + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1996,6 +2024,40 @@ void DwarfDebug::beginModule(Module *M) { /// void DwarfDebug::endModule() { if (!FirstCU) return; + const Module *M = MMI->getModule(); + if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { + for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { + if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; + DISubprogram SP(AllSPs->getOperand(SI)); + if (!SP.Verify()) continue; + + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); + if (!NMD) continue; + unsigned E = NMD->getNumOperands(); + if (!E) continue; + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + for (unsigned I = 0; I != E; ++I) { + DIVariable DV(NMD->getOperand(I)); + if (!DV.Verify()) continue; + Scope->addVariable(new DbgVariable(DV)); + } + + // Construct subprogram DIE and add variables DIEs. + constructSubprogramDIE(SP); + DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); + const SmallVector &Variables = Scope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); + } + } + } // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), @@ -2037,15 +2099,15 @@ void DwarfDebug::endModule() { // Compute DIE offsets and sizes. computeSizeAndOffsets(); + // Emit source line correspondence into a debug line section. + emitDebugLines(); + // Emit all the DIEs into a debug info section emitDebugInfo(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2150,8 +2212,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { - SmallPtrSet Processed; +void +DwarfDebug::collectVariableInfo(const MachineFunction *MF, + SmallPtrSet &Processed) { /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); @@ -2180,16 +2243,23 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { if (Processed.count(DV) != 0) continue; + const MachineInstr *PrevMI = MInsn; for (SmallVector::iterator MI = I+1, ME = DbgValues.end(); MI != ME; ++MI) { const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI)) + if (Var == DV && isDbgValueInDefinedReg(*MI) && + !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); + PrevMI = *MI; } DbgScope *Scope = findDbgScope(MInsn); - if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable) + bool CurFnArg = false; + if (DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(DV.getContext()).describes(MF->getFunction())) + CurFnArg = true; + if (!Scope && CurFnArg) Scope = CurrentFnDbgScope; // If variable scope is not found then skip this variable. if (!Scope) @@ -2198,7 +2268,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { Processed.insert(DV); DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + if (!CurFnArg) DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; @@ -2217,7 +2287,8 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { const MachineInstr *Begin = NULL; const MachineInstr *End = NULL; for (SmallVector::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + MVI != MVE; ++MVI) { if (!Begin) { Begin = *MVI; continue; @@ -2241,8 +2312,11 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { } // Collect info for variables that were optimized out. + const Function *F = MF->getFunction(); + const Module *M = F->getParent(); if (NamedMDNode *NMD = - MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(F->getName())))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIVariable DV(cast_or_null(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) @@ -2319,7 +2393,8 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, + const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -2335,13 +2410,20 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl if (!WScope->getParent()) { StringRef SPName = DISubprogram(Scope).getLinkageName(); - if (SPName == Asm->MF->getFunction()->getName()) + // We used to check only for a linkage name, but that fails + // since we began omitting the linkage name for private + // functions. The new way is to check for the name in metadata, + // but that's not supported in old .ll test cases. Ergo, we + // check both. + if (SPName == Asm->MF->getFunction()->getName() || + DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) CurrentFnDbgScope = WScope; } return WScope; } + getOrCreateAbstractScope(Scope); DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); if (WScope) return WScope; @@ -2355,7 +2437,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl Parent->addScope(WScope); ConcreteScopes[InlinedAt] = WScope; - getOrCreateAbstractScope(Scope); return WScope; } @@ -2365,8 +2446,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl static bool hasValidLocation(LLVMContext &Ctx, const MachineInstr *MInsn, const MDNode *&Scope, const MDNode *&InlinedAt) { - if (MInsn->isDebugValue()) - return false; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; @@ -2488,7 +2567,8 @@ bool DwarfDebug::extractScopeInformation() { // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + PrevInlinedAt); MIRanges.push_back(R); } @@ -2565,7 +2645,6 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsBeginScopeSet.insert(RI->first); InsnsEndScopeSet.insert(RI->second); } } @@ -2616,6 +2695,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, Scope); + /// ProcessedArgs - Collection of arguments already processed. + SmallPtrSet ProcessedArgs; + DebugLoc PrevLoc; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) @@ -2624,14 +2706,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; DebugLoc DL = MI->getDebugLoc(); if (MI->isDebugValue()) { - // DBG_VALUE needs a label if the variable is local variable or - // an argument whose location is changing. assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + // If DBG_VALUE is for a local variable then it needs a label. + if (DV.getTag() != dwarf::DW_TAG_arg_variable + && isDbgValueInUndefinedReg(MI) == false) InsnNeedsLabel.insert(MI); - else if (!ProcessedArgs.insert(DV)) + // DBG_VALUE for inlined functions argument needs a label. + else if (!DISubprogram(getDISubprogram(DV.getContext())). + describes(MF->getFunction())) + InsnNeedsLabel.insert(MI); + // DBG_VALUE indicating argument location change needs a label. + else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { // If location is unknown then instruction needs a location only if @@ -2664,7 +2751,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - collectVariableInfo(MF); + SmallPtrSet ProcessedVars; + collectVariableInfo(MF, ProcessedVars); // Get function line info. if (!Lines.empty()) { @@ -2679,9 +2767,31 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Construct abstract scopes. for (SmallVector::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - constructScopeDIE(*AI); - + AE = AbstractScopesList.end(); AI != AE; ++AI) { + DISubprogram SP((*AI)->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + const Module *M = MF->getFunction()->getParent(); + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast_or_null(NMD->getOperand(i))); + if (!DV || !ProcessedVars.insert(DV)) + continue; + DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); + } + } + } + if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0) + constructScopeDIE(*AI); + } + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) @@ -2696,13 +2806,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; InsnNeedsLabel.clear(); - ProcessedArgs.clear(); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DbgVariableLabelsMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsBeginScopeSet.clear(); InsnsEndScopeSet.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); @@ -2764,7 +2872,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, + const MDNode *S) { StringRef Dir; StringRef Fn; @@ -2790,6 +2899,16 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode Src = GetOrCreateSourceID(Dir, Fn); } +#if 0 + if (!Lines.empty()) { + SrcLineInfo lastSrcLineInfo = Lines.back(); + // Emitting sequential line records with the same line number (but + // different addresses) seems to confuse GDB. Avoid this. + if (lastSrcLineInfo.getLine() == Line) + return NULL; + } +#endif + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2898,7 +3017,8 @@ void DwarfDebug::EmitSectionLabels() { if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) EmitSectionSym(Asm, MacroInfo); - EmitSectionSym(Asm, TLOF.getDwarfLineSection()); + DwarfDebugLineSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); @@ -2961,6 +3081,11 @@ void DwarfDebug::emitDIE(DIE *Die) { 4); break; } + case dwarf::DW_AT_stmt_list: { + Asm->EmitLabelDifference(CurrentLineSectionSym, + DwarfDebugLineSectionSym, 4); + break; + } case dwarf::DW_AT_location: { if (UseDotDebugLocEntry.count(Die) != 0) { DIELabel *L = cast(Values[i]); @@ -3106,6 +3231,8 @@ void DwarfDebug::emitDebugLines() { Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. + CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin"); + Asm->OutStreamer.EmitLabel(CurrentLineSectionSym); Asm->OutStreamer.AddComment("Length of Source Line Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), Asm->GetTempSymbol("line_begin"), 4); @@ -3491,8 +3618,9 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getTargetData().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector::iterator I = DotDebugLocEntries.begin(), - E = DotDebugLocEntries.end(); I != E; ++I, ++index) { + for (SmallVector::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I, ++index) { DotDebugLocEntry Entry = *I; if (Entry.isEmpty()) { Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0d6116f..5a281c8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -156,6 +156,9 @@ class DwarfDebug { /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. DenseMap AbstractScopes; + /// AbstractSPDies - Collection of abstract subprogram DIEs. + DenseMap AbstractSPDies; + /// AbstractScopesList - Tracks abstract scopes constructed while processing /// a function. This list is cleared during endFunction(). SmallVectorAbstractScopesList; @@ -210,7 +213,7 @@ class DwarfDebug { DenseMap ContainingTypeMap; typedef SmallVector ScopeVector; - SmallPtrSet InsnsBeginScopeSet; + SmallPtrSet InsnsEndScopeSet; /// InlineInfo - Keep track of inlined functions and their location. This @@ -219,6 +222,10 @@ class DwarfDebug { DenseMap > InlineInfo; SmallVector InlinedSPNodes; + // ProcessedSPNodes - This is a collection of subprogram MDNodes that + // are processed to create DIEs. + SmallPtrSet ProcessedSPNodes; + /// LabelsBeforeInsn - Maps instruction with label emitted before /// instruction. DenseMap LabelsBeforeInsn; @@ -231,9 +238,6 @@ class DwarfDebug { /// a debuggging information entity. SmallPtrSet InsnNeedsLabel; - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet ProcessedArgs; - SmallVector DebugRangeSymbols; /// Previous instruction's location information. This is used to determine @@ -257,7 +261,10 @@ class DwarfDebug { MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + + DIEInteger *DIEIntegerOne; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -593,7 +600,8 @@ private: bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. - void collectVariableInfo(const MachineFunction *); + void collectVariableInfo(const MachineFunction *, + SmallPtrSet &ProcessedVars); /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index f92127f..c8a63cf 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -52,13 +52,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); SymName += "__"; SymName += Id; - + // Capitalize the first letter of the module name. SymName[Letter] = toupper(SymName[Letter]); - + SmallString<128> TmpStr; AP.Mang->getNameWithPrefix(TmpStr, SymName); - + MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 9dec22e..7f98df0 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -358,23 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. This -/// returns true if OldInst's block is modified, false if NewDest is modified. +/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *OldBB = OldInst->getParent(); - - // Remove all the old successors of OldBB from the CFG. - while (!OldBB->succ_empty()) - OldBB->removeSuccessor(OldBB->succ_begin()); - - // Remove all the dead instructions from the end of OldBB. - OldBB->erase(OldInst, OldBB->end()); - - // If OldBB isn't immediately before OldBB, insert a branch to it. - if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) - TII->InsertBranch(*OldBB, NewDest, 0, SmallVector()); - OldBB->addSuccessor(NewDest); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); ++NumTailMerge; } @@ -383,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. @@ -443,18 +433,20 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; + DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector()); + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector(), dl); } bool @@ -625,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. -unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength) { - unsigned commonTailIndex = 0; +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; unsigned TimeEstimate = ~0U; for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. @@ -655,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + SameTails[commonTailIndex].setBlock(newMBB); SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); @@ -662,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, if (PredBB == MBB) PredBB = newMBB; - return commonTailIndex; + return true; } // See if any of the blocks in MergePotentials (which all have a common single @@ -757,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); @@ -874,10 +876,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } @@ -976,6 +979,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); + DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1027,7 +1031,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1066,7 +1070,7 @@ ReoptimizeBlock: // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1079,7 +1083,7 @@ ReoptimizeBlock: SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1116,7 +1120,7 @@ ReoptimizeBlock: << "To make fallthrough to: " << *PriorTBB << "\n"); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1145,7 +1149,7 @@ ReoptimizeBlock: SmallVector NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1200,7 +1204,7 @@ ReoptimizeBlock: PriorFBB = MBB; } TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1226,7 +1230,7 @@ ReoptimizeBlock: if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1246,7 +1250,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); } } @@ -1286,7 +1290,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); } MBB->moveAfter(PredBB); MadeChange = true; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index b087395..15dfa7f 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -102,8 +102,9 @@ namespace llvm { MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 3e38872..ffeff1e 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,19 +1,20 @@ add_llvm_library(LLVMCodeGen - Analysis.cpp AggressiveAntiDepBreaker.cpp + Analysis.cpp BranchFolding.cpp CalcSpillWeights.cpp + CallingConvLower.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp ELFCodeEmitter.cpp ELFWriter.cpp - ExactHazardRecognizer.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp IfConversion.cpp + InlineSpiller.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp @@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen OptimizePHIs.cpp PHIElimination.cpp Passes.cpp + PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp ProcessImplicitDefs.cpp @@ -52,7 +54,6 @@ add_llvm_library(LLVMCodeGen PseudoSourceValue.cpp RegAllocFast.cpp RegAllocLinearScan.cpp - RegAllocLocal.cpp RegAllocPBQP.cpp RegisterCoalescer.cpp RegisterScavenging.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index a328d0e..240a7b9 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -116,7 +116,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { SmallVector spillIs; if (lis->isReMaterializable(li, spillIs, isLoad)) { // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are + // it is a preferred candidate for spilling. If none of the defs are // loads, then it's potentially very cheap to re-materialize. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp new file mode 100644 index 0000000..62ad817 --- /dev/null +++ b/lib/CodeGen/CallingConvLower.cpp @@ -0,0 +1,177 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, + SmallVector &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an array of argument values, +/// incorporating info about the formals into this state. +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + +/// AnalyzeReturn - Analyze the returned values of a return, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, + SmallVectorImpl &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { +#ifndef NDEBUG + dbgs() << "Call result has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } +} diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 3ff2a04..e0e315c 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -178,6 +178,8 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() + << " to top of loop.\n"); Changed = true; // Move it and all the blocks that can reach it via fallthrough edges @@ -297,6 +299,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() + << " to be contiguous with loop.\n"); Changed = true; // Process this block and all loop blocks contiguous with it, to keep diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index fd957b1..e3746a9 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -29,6 +30,7 @@ CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)) { @@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) { } void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register operands for this instruction and update // Classes and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); - // It's not safe to change register allocation for source operands of - // that have special allocation requirements. - if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (MO.isUse() && Special) { if (KeepRegs.insert(Reg)) { for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) @@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Update liveness. // Proceding upwards, registers that are defed but not used in this // instruction are now dead. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); - Classes[Reg] = 0; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; - KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); - Classes[SubregReg] = 0; - RegRefs.erase(SubregReg); - } - // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast(-1); + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast(-1); + } } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -334,10 +361,15 @@ BreakAntiDependencies(const std::vector& SUnits, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + DenseMap MISUnitMap; + // Find the node at the bottom of the critical path. const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } @@ -473,7 +505,11 @@ BreakAntiDependencies(const std::vector& SUnits, PrescanInstruction(MI); - if (MI->getDesc().hasExtraDefRegAllocReq()) + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -485,7 +521,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } @@ -519,8 +555,22 @@ BreakAntiDependencies(const std::vector& SUnits, std::multimap::iterator> Range = RegRefs.equal_range(AntiDepReg); for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } + } // We just went back in time and modified history; the // liveness information for the anti-depenence reg is now diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index cc42dd2..5406300 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -22,15 +22,18 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include namespace llvm { +class TargetInstrInfo; +class TargetRegisterInfo; + class CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index f6739f4..01b31b4 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -87,10 +88,13 @@ namespace { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still /// use the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. - bool CleanupSelectors(); + bool CleanupSelectors(SmallPtrSet &Sels); + + bool HasCatchAllInSelector(IntrinsicInst *); /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet &Sels); + void FindAllCleanupSelectors(SmallPtrSet &Sels, + SmallPtrSet &CatchAllSels); /// FindAllURoRInvokes - Find all URoR invokes in the function. void FindAllURoRInvokes(SmallPtrSet &URoRInvokes); @@ -150,7 +154,7 @@ namespace { Changed = true; } - return false; + return Changed; } public: @@ -186,25 +190,32 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { return new DwarfEHPrepare(tm, fast); } +/// HasCatchAllInSelector - Return true if the intrinsic instruction has a +/// catch-all. +bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { + if (!EHCatchAllValue) return false; + + unsigned ArgIdx = II->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast(II->getArgOperand(ArgIdx)); + return GV == EHCatchAllValue; +} + /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet &Sels) { +FindAllCleanupSelectors(SmallPtrSet &Sels, + SmallPtrSet &CatchAllSels) { for (Value::use_iterator I = SelectorIntrinsic->use_begin(), E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *SI = cast(I); - if (!SI || SI->getParent()->getParent() != F) continue; - - unsigned NumOps = SI->getNumOperands(); - if (NumOps > 4) continue; - bool IsCleanUp = (NumOps == 3); + IntrinsicInst *II = cast(I); - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast(SI->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); + if (II->getParent()->getParent() != F) + continue; - if (IsCleanUp) - Sels.insert(SI); + if (!HasCatchAllInSelector(II)) + Sels.insert(II); + else + CatchAllSels.insert(II); } } @@ -222,7 +233,7 @@ FindAllURoRInvokes(SmallPtrSet &URoRInvokes) { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use /// the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. -bool DwarfEHPrepare::CleanupSelectors() { +bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet &Sels) { if (!EHCatchAllValue) return false; if (!SelectorIntrinsic) { @@ -232,17 +243,15 @@ bool DwarfEHPrepare::CleanupSelectors() { } bool Changed = false; - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *Sel = dyn_cast(I); - if (!Sel || Sel->getParent()->getParent() != F) continue; + for (SmallPtrSet::iterator + I = Sels.begin(), E = Sels.end(); I != E; ++I) { + IntrinsicInst *Sel = *I; // Index of the ".llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumOperands() - 1; - GlobalVariable *GV = dyn_cast(Sel->getOperand(OpIdx)); + unsigned OpIdx = Sel->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast(Sel->getArgOperand(OpIdx)); if (GV != EHCatchAllValue) continue; - Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer()); + Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); Changed = true; } @@ -293,8 +302,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { - if (!DT) return CleanupSelectors(); // We require DominatorTree information. - if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); @@ -307,14 +314,20 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!SelectorIntrinsic) return false; } + SmallPtrSet Sels; + SmallPtrSet CatchAllSels; + FindAllCleanupSelectors(Sels, CatchAllSels); + + if (!DT) + // We require DominatorTree information. + return CleanupSelectors(CatchAllSels); + if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(); + if (!URoR) return CleanupSelectors(CatchAllSels); } - SmallPtrSet Sels; SmallPtrSet URoRInvokes; - FindAllCleanupSelectors(Sels); FindAllURoRInvokes(URoRInvokes); SmallPtrSet SelsToConvert; @@ -340,7 +353,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) return CleanupSelectors(); + if (!ExceptionValueIntrinsic) + return CleanupSelectors(CatchAllSels); } for (Value::use_iterator @@ -360,21 +374,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() { // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) { - IntrinsicInst *II = *SI; - unsigned NumOps = II->getNumOperands(); - - if (NumOps <= 4) { - bool IsCleanUp = (NumOps == 3); - - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast(II->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); - - if (IsCleanUp) - SelsToConvert.insert(II); - } - } + SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) + if (!HasCatchAllInSelector(*SI)) + SelsToConvert.insert(*SI); } } } @@ -388,12 +390,22 @@ bool DwarfEHPrepare::HandleURoRInvokes() { SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; - SmallVector Args; // Use the exception object pointer and the personality function // from the original selector. - Args.push_back(II->getOperand(1)); // Exception object pointer. - Args.push_back(II->getOperand(2)); // Personality function. + CallSite CS(II); + IntrinsicInst::op_iterator I = CS.arg_begin(); + IntrinsicInst::op_iterator E = CS.arg_end(); + IntrinsicInst::op_iterator B = prior(E); + + // Exclude last argument if it is an integer. + if (isa(B)) E = B; + + // Add exception object pointer (front). + // Add personality function (next). + // Add in any filter IDs (rest). + SmallVector Args(I, E); + Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = @@ -409,7 +421,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { } } - Changed |= CleanupSelectors(); + Changed |= CleanupSelectors(CatchAllSels); return Changed; } diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 8416d3b..36b0e65 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -90,7 +90,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { for (std::vector::iterator MRI = JTRelocations.begin(), MRE = JTRelocations.end(); MRI != MRE; ++MRI) { MachineRelocation &MR = *MRI; - unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setResultPointer((void*)MBBOffset); MR.setConstantVal(ES->SectionIdx); JTSection.addRelocation(MR); diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp deleted file mode 100644 index af5f289..0000000 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements a hazard recognizer using the instructions itineraries -// defined for the current target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "post-RA-sched" -#include "ExactHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrItineraries.h" - -using namespace llvm; - -ExactHazardRecognizer:: -ExactHazardRecognizer(const InstrItineraryData &LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) -{ - // Determine the maximum depth of any itinerary. This determines the - // depth of the scoreboard. We always make the scoreboard at least 1 - // cycle deep to avoid dealing with the boundary condition. - unsigned ScoreboardDepth = 1; - if (!ItinData.isEmpty()) { - for (unsigned idx = 0; ; ++idx) { - if (ItinData.isEndMarker(idx)) - break; - - const InstrStage *IS = ItinData.beginStage(idx); - const InstrStage *E = ItinData.endStage(idx); - unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); - - ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); - } - } - - ReservedScoreboard.reset(ScoreboardDepth); - RequiredScoreboard.reset(ScoreboardDepth); - - DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " - << ScoreboardDepth << '\n'); -} - -void ExactHazardRecognizer::Reset() { - RequiredScoreboard.reset(); - ReservedScoreboard.reset(); -} - -void ExactHazardRecognizer::ScoreBoard::dump() const { - dbgs() << "Scoreboard:\n"; - - unsigned last = Depth - 1; - while ((last > 0) && ((*this)[last] == 0)) - last--; - - for (unsigned i = 0; i <= last; i++) { - unsigned FUs = (*this)[i]; - dbgs() << "\t"; - for (int j = 31; j >= 0; j--) - dbgs() << ((FUs & (1 << j)) ? '1' : '0'); - dbgs() << '\n'; - } -} - -ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) { - if (ItinData.isEmpty()) - return NoHazard; - - unsigned cycle = 0; - - // Use the itinerary for the underlying instruction to check for - // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { - // We must find one of the stage's units free for every cycle the - // stage is occupied. FIXME it would be more accurate to find the - // same unit free in all the cycles. - for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); - - unsigned freeUnits = IS->getUnits(); - switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); - case InstrStage::Required: - // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; - // FALLTHROUGH - case InstrStage::Reserved: - // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; - break; - } - - if (!freeUnits) { - DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); - DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); - return Hazard; - } - } - - // Advance the cycle to the next stage. - cycle += IS->getNextCycles(); - } - - return NoHazard; -} - -void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { - if (ItinData.isEmpty()) - return; - - unsigned cycle = 0; - - // Use the itinerary for the underlying instruction to reserve FU's - // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { - // We must reserve one of the stage's units for every cycle the - // stage is occupied. FIXME it would be more accurate to reserve - // the same unit free in all the cycles. - for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); - - unsigned freeUnits = IS->getUnits(); - switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); - case InstrStage::Required: - // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; - // FALLTHROUGH - case InstrStage::Reserved: - // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; - break; - } - - // reduce to a single unit - unsigned freeUnit = 0; - do { - freeUnit = freeUnits; - freeUnits = freeUnit & (freeUnit - 1); - } while (freeUnits); - - assert(freeUnit && "No function unit available!"); - if (IS->getReservationKind() == InstrStage::Required) - RequiredScoreboard[cycle + i] |= freeUnit; - else - ReservedScoreboard[cycle + i] |= freeUnit; - } - - // Advance the cycle to the next stage. - cycle += IS->getNextCycles(); - } - - DEBUG(ReservedScoreboard.dump()); - DEBUG(RequiredScoreboard.dump()); -} - -void ExactHazardRecognizer::AdvanceCycle() { - ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); - RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); -} diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h deleted file mode 100644 index 91c81a9..0000000 --- a/lib/CodeGen/ExactHazardRecognizer.h +++ /dev/null @@ -1,86 +0,0 @@ -//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ExactHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetInstrItineraries.h" - -namespace llvm { - class ExactHazardRecognizer : public ScheduleHazardRecognizer { - // ScoreBoard to track function unit usage. ScoreBoard[0] is a - // mask of the FUs in use in the cycle currently being - // schedule. ScoreBoard[1] is a mask for the next cycle. The - // ScoreBoard is used as a circular buffer with the current cycle - // indicated by Head. - class ScoreBoard { - unsigned *Data; - - // The maximum number of cycles monitored by the Scoreboard. This - // value is determined based on the target itineraries to ensure - // that all hazards can be tracked. - size_t Depth; - // Indices into the Scoreboard that represent the current cycle. - size_t Head; - public: - ScoreBoard():Data(NULL), Depth(0), Head(0) { } - ~ScoreBoard() { - delete[] Data; - } - - size_t getDepth() const { return Depth; } - unsigned& operator[](size_t idx) const { - assert(Depth && "ScoreBoard was not initialized properly!"); - - return Data[(Head + idx) % Depth]; - } - - void reset(size_t d = 1) { - if (Data == NULL) { - Depth = d; - Data = new unsigned[Depth]; - } - - memset(Data, 0, Depth * sizeof(Data[0])); - Head = 0; - } - - void advance() { - Head = (Head + 1) % Depth; - } - - // Print the scoreboard. - void dump() const; - }; - - // Itinerary data for the target. - const InstrItineraryData &ItinData; - - ScoreBoard ReservedScoreboard; - ScoreBoard RequiredScoreboard; - - public: - ExactHazardRecognizer(const InstrItineraryData &ItinData); - - virtual HazardType getHazardType(SUnit *SU); - virtual void Reset(); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - }; -} - -#endif diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 790cb21..71506cc 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -271,7 +271,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI); + Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } @@ -279,7 +279,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcread: if (LowerRd) { // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getOperand(2), "", CI); + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); @@ -290,7 +290,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { // Initialize the GC root, but do not delete the intrinsic. The // backend needs the intrinsic to flag the stack slot. Roots.push_back(cast( - CI->getOperand(1)->stripPointerCasts())); + CI->getArgOperand(0)->stripPointerCasts())); } break; default: diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c61fd17..6b445e0 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -33,20 +34,22 @@ using namespace llvm; static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); static cl::opt IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); static cl::opt IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); -static cl::opt DisableSimple("disable-ifcvt-simple", +static cl::opt DisableSimple("disable-ifcvt-simple", cl::init(false), cl::Hidden); -static cl::opt DisableSimpleF("disable-ifcvt-simple-false", +static cl::opt DisableSimpleF("disable-ifcvt-simple-false", cl::init(false), cl::Hidden); -static cl::opt DisableTriangle("disable-ifcvt-triangle", +static cl::opt DisableTriangle("disable-ifcvt-triangle", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleR("disable-ifcvt-triangle-rev", +static cl::opt DisableTriangleR("disable-ifcvt-triangle-rev", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleF("disable-ifcvt-triangle-false", +static cl::opt DisableTriangleF("disable-ifcvt-triangle-false", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", +static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); -static cl::opt DisableDiamond("disable-ifcvt-diamond", +static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); STATISTIC(NumSimple, "Number of simple if-conversions performed"); STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); @@ -115,7 +118,7 @@ namespace { BB(0), TrueBB(0), FalseBB(0) {} }; - /// IfcvtToken - Record information about pending if-conversions to attemp: + /// IfcvtToken - Record information about pending if-conversions to attempt: /// BBI - Corresponding BBInfo. /// Kind - Type of block. See IfcvtKind. /// NeedSubsumption - True if the to-be-predicated BB has already been @@ -146,6 +149,7 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; bool MadeChange; int FnNum; public: @@ -167,8 +171,7 @@ namespace { std::vector &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, bool isTriangle = false, bool RevBranch = false); - bool AnalyzeBlocks(MachineFunction &MF, - std::vector &Tokens); + void AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens); void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); @@ -177,14 +180,22 @@ namespace { unsigned NumDups1, unsigned NumDups2); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond); + SmallVectorImpl &Cond, + SmallSet &Redefs); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr = false); - void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(unsigned Size) const { - return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { + return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, + MachineBasicBlock &FBB, unsigned FSize) const { + return TSize > 0 && FSize > 0 && + TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -227,8 +238,15 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); if (!TII) return false; + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true); + bool BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -253,7 +271,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { // Do an initial analysis for each basic block and find all the potential // candidates to perform if-conversion. - bool Change = AnalyzeBlocks(MF, Tokens); + bool Change = false; + AnalyzeBlocks(MF, Tokens); while (!Tokens.empty()) { IfcvtToken *Token = Tokens.back(); Tokens.pop_back(); @@ -281,7 +300,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() @@ -289,8 +309,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { - if (isFalse) NumSimpleFalse++; - else NumSimple++; + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; } break; } @@ -316,11 +336,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { - if (isRev) NumTriangleFRev++; - else NumTriangleFalse++; + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; } else { - if (isRev) NumTriangleRev++; - else NumTriangle++; + if (isRev) ++NumTriangleRev; + else ++NumTriangle; } } break; @@ -332,7 +352,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); - if (RetVal) NumDiamonds++; + if (RetVal) ++NumDiamonds; break; } } @@ -361,13 +381,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); - if (MadeChange) { + if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable()); } + MadeChange |= BFChange; return MadeChange; } @@ -387,9 +408,10 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, /// ReverseBranchCondition - Reverse the condition of the end of the block /// branch. Swap block's 'true' and 'false' successors. bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere if (!TII->ReverseBranchCondition(BBI.BrCond)) { TII->RemoveBranch(*BBI.BB); - TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); std::swap(BBI.TrueBB, BBI.FalseBB); return true; } @@ -420,7 +442,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || - TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) return false; Dups = TrueBBI.NonPredSize; } @@ -431,7 +453,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { /// ValidTriangle - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid triangle shape for ifcvt. /// If 'FalseBranch' is true, it checks if 'true' block's false branch -/// branches to the false branch rather than the other way around. It also +/// branches to the 'false' block rather than the other way around. It also /// returns the number of instructions that the ifcvt would need to duplicate /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -457,7 +479,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (Size > TLI->getIfCvtDupBlockSizeLimit()) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) return false; Dups = Size; } @@ -514,7 +536,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); - while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + // Skip dbg_value instructions + while (TI != TIE && TI->isDebugValue()) + ++TI; + while (FI != FIE && FI->isDebugValue()) + ++FI; + while (TI != TIE && FI != FIE) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIE && TI->isDebugValue()) + ++TI; + if (TI == TIE) + break; + } + if (FI->isDebugValue()) { + while (FI != FIE && FI->isDebugValue()) + ++FI; + if (FI == FIE) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups1; @@ -524,7 +566,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, TI = firstNonBranchInst(TrueBBI.BB, TII); FI = firstNonBranchInst(FalseBBI.BB, TII); - while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + // Skip dbg_value instructions at end of the bb's. + while (TI != TIB && TI->isDebugValue()) + --TI; + while (FI != FIB && FI->isDebugValue()) + --FI; + while (TI != TIB && FI != FIB) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIB && TI->isDebugValue()) + --TI; + if (TI == TIB) + break; + } + if (FI->isDebugValue()) { + while (FI != FIB && FI->isDebugValue()) + --FI; + if (FI == FIB) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups2; @@ -556,7 +618,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // No false branch. This BB must end with a conditional branch and a // fallthrough. if (!BBI.FalseBB) - BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); if (!BBI.FalseBB) { // Malformed bcc? True and false blocks are the same? BBI.IsUnpredicable = true; @@ -569,6 +631,9 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { + if (I->isDebugValue()) + continue; + const TargetInstrDesc &TID = I->getDesc(); if (TID.isNotDuplicable()) BBI.CannotBeCopied = true; @@ -702,8 +767,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), + *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -720,7 +785,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -732,23 +797,23 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } - + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } if (ValidSimple(TrueBBI, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB // | \_ // | | // | TBB---> exit - // | + // | // FBB Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; @@ -757,21 +822,21 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } if (ValidSimple(FalseBBI, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -785,11 +850,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion -/// candidates. It returns true if any CFG restructuring is done to expose more -/// if-conversion opportunities. -bool IfConverter::AnalyzeBlocks(MachineFunction &MF, +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens) { - bool Change = false; std::set Visited; for (unsigned i = 0, e = Roots.size(); i != e; ++i) { for (idf_ext_iterator I=idf_ext_begin(Roots[i],Visited), @@ -801,20 +864,23 @@ bool IfConverter::AnalyzeBlocks(MachineFunction &MF, // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); - - return Change; } /// canFallThroughTo - Returns true either if ToBB is the next block after BB or /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); - while (++I != TI) - if (I == E || !I->empty()) + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) return false; + PI = I++; + } return true; } @@ -836,8 +902,9 @@ void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { /// static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere SmallVector NoCond; - TII->InsertBranch(*BB, ToBB, NULL, NoCond); + TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); } /// RemoveExtraEdges - Remove true / false edges if either / both are no longer @@ -849,6 +916,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Redefs.insert(*Subreg); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.erase(*SR); + } + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (Redefs.count(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/,false/*IsKill*/)); + } else { + Redefs.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.insert(*SR); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -873,13 +1000,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (TII->ReverseBranchCondition(Cond)) assert(false && "Unable to reverse branch condition!"); + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -922,6 +1055,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; BBInfo *CvtBBI = &TrueBBI; BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere SmallVector Cond(BBI.BrCond.begin(), BBI.BrCond.end()); if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) @@ -957,21 +1091,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } } + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + bool HasEarlyExit = CvtBBI->FalseBB != NULL; - bool DupBB = CvtBBI->BB->pred_size() > 1; - if (DupBB) { + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - MergeBlocks(BBI, *CvtBBI); + MergeBlocks(BBI, *CvtBBI, false); } // If 'true' block has a 'false' successor, add an exit branch to it. @@ -980,7 +1119,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) assert(false && "Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); } @@ -1009,7 +1148,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { RemoveExtraEdges(BBI); // Update block info. BB can be iteratively if-converted. - if (!IterIfcvt) + if (!IterIfcvt) BBI.IsDone = true; InvalidatePreds(BBI.BB); CvtBBI->IsDone = true; @@ -1044,9 +1183,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return false; } - // Merge the 'true' and 'false' blocks by copying the instructions - // from the 'false' block to the 'true' block. That is, unless the true - // block would clobber the predicate, in that case, do the opposite. + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. BBInfo *BBI1 = &TrueBBI; BBInfo *BBI2 = &FalseBBI; SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); @@ -1071,39 +1210,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Remove the conditional branch from entry to the blocks. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + MachineBasicBlock::iterator DIE1 = BBI1->BB->end(); + MachineBasicBlock::iterator DIE2 = BBI2->BB->end(); + // Skip dbg_value instructions + while (DI1 != DIE1 && DI1->isDebugValue()) + ++DI1; + while (DI2 != DIE2 && DI2->isDebugValue()) + ++DI2; BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } while (NumDups1 != 0) { - ++DI1; ++DI2; - --NumDups1; + if (!DI2->isDebugValue()) + --NumDups1; } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); // Predicate the 'true' block after removing its branch. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); DI1 = BBI1->BB->end(); - for (unsigned i = 0; i != NumDups2; ++i) + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); // Predicate the 'false' block. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); --DI2; - --NumDups2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; } - PredicateBlock(*BBI2, DI2, *Cond2); + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1); - MergeBlocks(BBI, *BBI2); + MergeBlocks(BBI, *BBI1, TailBB == 0); + MergeBlocks(BBI, *BBI2, TailBB == 0); // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1111,16 +1283,32 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // tail, add a unconditional branch to it. if (TailBB) { BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; - if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + bool CanMergeTail = !TailBBI.HasFallThrough; + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { + BBI.BB->addSuccessor(TailBB); InsertUncondBranch(BBI.BB, TailBB, TII); BBI.HasFallThrough = false; } } + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB); RemoveExtraEdges(BBI); // Update block info. @@ -1135,9 +1323,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond) { + SmallVectorImpl &Cond, + SmallSet &Redefs) { for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { - if (TII->isPredicated(I)) + if (I->isDebugValue() || TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG @@ -1145,6 +1334,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #endif llvm_unreachable(0); } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1152,48 +1345,55 @@ void IfConverter::PredicateBlock(BBInfo &BBI, BBI.IsAnalyzed = false; BBI.NonPredSize = 0; - NumIfConvBBs++; + ++NumIfConvBBs; } /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { const TargetInstrDesc &TID = I->getDesc(); - bool isPredicated = TII->isPredicated(I); // Do not copy the end of the block branches. - if (IgnoreBr && !isPredicated && TID.isBranch()) + if (IgnoreBr && TID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - if (!isPredicated) + if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); } - std::vector Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); - MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + if (!IgnoreBr) { + std::vector Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; - // Fallthrough edge can't be transferred. - if (Succ == FallThrough) - continue; - ToBBI.BB->addSuccessor(Succ); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } } std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), @@ -1203,25 +1403,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; - NumDupBBs++; + ++NumDupBBs; } /// MergeBlocks - Move all instructions from FromBB to the end of ToBB. -/// -void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); - // Redirect all branches to FromBB to ToBB. - std::vector Preds(FromBBI.BB->pred_begin(), - FromBBI.BB->pred_end()); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - MachineBasicBlock *Pred = Preds[i]; - if (Pred == ToBBI.BB) - continue; - Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); - } - std::vector Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); @@ -1233,7 +1426,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - ToBBI.BB->addSuccessor(Succ); + if (AddEdges) + ToBBI.BB->addSuccessor(Succ); } // Now FromBBI always falls through to the next block! diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 0000000..12adcaa --- /dev/null +++ b/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,408 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class InlineSpiller : public Spiller { + MachineFunction &mf_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineFrameInfo &mfi_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + const BitVector reserved_; + + // Variables that are valid during spill(), but used by multiple methods. + LiveInterval *li_; + std::vector *newIntervals_; + const TargetRegisterClass *rc_; + int stackSlot_; + const SmallVectorImpl *spillIs_; + + // Values of the current interval that can potentially remat. + SmallPtrSet reMattable_; + + // Values in reMattable_ that failed to remat at some point. + SmallPtrSet usedValues_; + + ~InlineSpiller() {} + +public: + InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) + : mf_(*mf), lis_(*lis), vrm_(*vrm), + mfi_(*mf->getFrameInfo()), + mri_(mf->getRegInfo()), + tii_(*mf->getTarget().getInstrInfo()), + tri_(*mf->getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)) {} + + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex); + +private: + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + bool reMaterializeFor(MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops); + void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); +}; +} + +namespace llvm { +Spiller *createInlineSpiller(MachineFunction *mf, + LiveIntervals *lis, + const MachineLoopInfo *mli, + VirtRegMap *vrm) { + return new InlineSpiller(mf, lis, vrm); +} +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getUseIndex(); + UseIdx = UseIdx.getUseIndex(); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg) + continue; + // Reserved registers are OK. + if (MO.isUndef() || !lis_.hasInterval(MO.getReg())) + continue; + // We don't want to move any defs. + if (MO.isDef()) + return false; + // We cannot depend on virtual registers in spillIs_. They will be spilled. + for (unsigned si = 0, se = spillIs_->size(); si != se; ++si) + if ((*spillIs_)[si]->reg == MO.getReg()) + return false; + + LiveInterval &LI = lis_.getInterval(MO.getReg()); + const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + if (OVNI != LI.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of +/// reloading it. +bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); + VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx); + if (!OrigVNI) { + DEBUG(dbgs() << "\tadding flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + if (!reMattable_.count(OrigVNI)) { + DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": " + << UseIdx << '\t' << *MI); + return false; + } + MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def); + if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes li_->reg, it had better not require the same + // register for uses and defs. + bool Reads, Writes; + SmallVector Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops); + if (Writes) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + } + } + + // Alocate a new register for the remat. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + newIntervals_->push_back(&NewLI); + + // Finally we can rematerialize OrigMI before MI. + MachineBasicBlock &MBB = *MI->getParent(); + tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_); + MachineBasicBlock::iterator RematMI = MI; + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex(); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI); + + // Replace operands + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) { + MO.setReg(NewVReg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // Do a quick scan of the interval values to find if any are remattable. + reMattable_.clear(); + usedValues_.clear(); + for (LiveInterval::const_vni_iterator I = li_->vni_begin(), + E = li_->vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || !VNI->isDefAccurate()) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) + continue; + reMattable_.insert(VNI); + } + + // Often, no defs are remattable. + if (reMattable_.empty()) + return; + + // Try to remat before all uses of li_->reg. + bool anyRemat = false; + for (MachineRegisterInfo::use_nodbg_iterator + RI = mri_.use_nodbg_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(MI); + + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + bool anyRemoved = false; + for (SmallPtrSet::iterator I = reMattable_.begin(), + E = reMattable_.end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->hasPHIKill() || usedValues_.count(VNI)) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); + lis_.RemoveMachineInstrFromMaps(DefMI); + vrm_.RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + li_->removeValNo(VNI); + anyRemoved = true; + } + + if (!anyRemoved) + return; + + // Removing values may cause debug uses where li_ is not live. + for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) { + if (!MI->isDebugValue()) + continue; + // Try to preserve the debug value if li_ is live immediately after it. + MachineBasicBlock::iterator NextMI = MI; + ++NextMI; + if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { + SlotIndex NearIdx = lis_.getInstructionIndex(NextMI); + if (li_->liveAt(NearIdx)) + continue; + } + DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); + MI->eraseFromParent(); + } +} + +/// foldMemoryOperand - Try folding stack slot references in Ops into MI. +/// Return true on success, and MI will be erased. +bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops) { + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector FoldOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned Idx = Ops[i]; + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) + continue; + // FIXME: Teach targets to deal with subregs. + if (MO.getSubReg()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + if (!FoldMI) + return false; + lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + vrm_.addSpillSlotUse(stackSlot_, FoldMI); + MI->eraseFromParent(); + DEBUG(dbgs() << "\tfolded: " << *FoldMI); + return true; +} + +/// insertReload - Insert a reload of NewLI.reg before MI. +void InlineSpiller::insertReload(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + --MI; // Point to load instruction. + SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); +} + +/// insertSpill - Insert a spill of NewLI.reg after MI. +void InlineSpiller::insertSpill(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + --MI; // Point to store instruction. + SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); +} + +void InlineSpiller::spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex) { + DEBUG(dbgs() << "Inline spilling " << *li << "\n"); + assert(li->isSpillable() && "Attempting to spill already spilled value."); + assert(!li->isStackSlot() && "Trying to spill a stack slot."); + + li_ = li; + newIntervals_ = &newIntervals; + rc_ = mri_.getRegClass(li->reg); + spillIs_ = &spillIs; + + reMaterializeAll(); + + // Remat may handle everything. + if (li_->empty()) + return; + + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + + // Iterate over instructions using register. + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else { + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + MI->eraseFromParent(); + } + continue; + } + + // Analyze instruction. + bool Reads, Writes; + SmallVector Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops); + + // Attempt to fold memory ops. + if (foldMemoryOperand(MI, Ops)) + continue; + + // Allocate interval around instruction. + // FIXME: Infer regclass from instruction alone. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + + if (Reads) + insertReload(NewLI, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + MO.setReg(NewVReg); + if (MO.isUse()) { + if (!MI->isRegTiedToDefOperand(Ops[i])) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + + // FIXME: Use a second vreg if instruction has no tied ops. + if (Writes && hasLiveDef) + insertSpill(NewLI, MI); + + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + newIntervals.push_back(&NewLI); + } +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 63bb5f2..03ae214 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -16,6 +16,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" @@ -314,21 +315,22 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { - switch (CI->getOperand(1)->getType()->getTypeID()) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: - ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: - ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(), - CI->getOperand(1)->getType()); + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); break; } } @@ -340,6 +342,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); + CallSite CS(CI); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ @@ -353,7 +356,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { // by the lowerinvoke pass. In both cases, the right thing to do is to // convert the call to an explicit setjmp or longjmp call. case Intrinsic::setjmp: { - Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getInt32Ty(Context)); if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(V); @@ -365,32 +368,32 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; case Intrinsic::longjmp: { - ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::siglongjmp: { // Insert the call to abort - ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::ctpop: - CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::bswap: - CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::ctlz: - CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::cttz: { // cttz(x) -> ctpop(~X & (X-1)) - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); Value *NotSrc = Builder.CreateNot(Src); NotSrc->setName(Src->getName() + ".not"); Value *SrcM1 = ConstantInt::get(Src->getType(), 1); @@ -451,37 +454,37 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::memcpy: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memmove: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memset: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); + Ops[0] = CI->getArgOperand(0); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context), + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; - ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::sqrt: { diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b584704..bf3137e 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -329,12 +329,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); - // Delete dead machine instructions regardless of optimization level. - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass", - /* allowDoubleDefs= */ true); - if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass", + /* allowDoubleDefs= */ true); + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); @@ -358,7 +361,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); // Perform register allocation. - PM.add(createRegisterAllocator()); + PM.add(createRegisterAllocator(OptLevel)); printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 03b4eab..b9527fa 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -118,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { SUnit *LatencyPriorityQueue::pop() { if (empty()) return NULL; std::vector::iterator Best = Queue.begin(); - for (std::vector::iterator I = next(Queue.begin()), + for (std::vector::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 025ad05..21a9b7d 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -68,6 +68,37 @@ bool LiveInterval::liveBeforeAndAt(SlotIndex I) const { return r->end == I; } +/// killedAt - Return true if a live range ends at index. Note that the kill +/// point is not contained in the half-open live range. It is usually the +/// getDefIndex() slot following its last use. +bool LiveInterval::killedAt(SlotIndex I) const { + Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I); + + // Now r points to the first interval with start >= I, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= I. + // r->end is the kill point. + return r->end == I; +} + +/// killedInRange - Return true if the interval has kills in [Start,End). +bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { + Ranges::const_iterator r = + std::lower_bound(ranges.begin(), ranges.end(), End); + + // Now r points to the first interval with start >= End, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= End. + // r->end is the kill point. + return r->end >= Start && r->end < End; +} + // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -149,7 +180,6 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; - SlotIndex OldEnd = I->end; // Search for the first interval that we can't merge with. Ranges::iterator MergeTo = next(I); @@ -163,9 +193,6 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // Erase any dead ranges. ranges.erase(next(I), MergeTo); - // Update kill info. - ValNo->removeKills(OldEnd, I->end.getPrevSlot()); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. Ranges::iterator Next = next(I); @@ -245,9 +272,6 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { // endpoint as well. if (End > it->end) extendIntervalEndTo(it, End); - else if (End < it->end) - // Overlapping intervals, there might have been a kill here. - it->valno->removeKill(End); return it; } } else { @@ -288,7 +312,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - ValNo->removeKills(Start, End); if (RemoveDeadValNo) { // Check if val# is dead. bool isDead = true; @@ -296,7 +319,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, if (II != I && II->valno == ValNo) { isDead = false; break; - } + } if (isDead) { // Now that ValNo is dead, remove it. If it is the largest value // number, just nuke it (and any other deleted values neighboring it), @@ -320,7 +343,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, // Otherwise if the span we are removing is at the end of the LiveRange, // adjust the other way. if (I->end == End) { - ValNo->removeKills(Start, End); I->end = Start; return; } @@ -529,6 +551,7 @@ void LiveInterval::MergeValueInAsValue( SmallVector ReplacedValNos; iterator IP = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; SlotIndex Start = I->start, End = I->end; @@ -823,10 +846,12 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { else { OS << " = "; for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) - OS << *I; + E = ranges.end(); I != E; ++I) { + OS << *I; + assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + } } - + // Print value number info. if (getNumValNums()) { OS << " "; @@ -843,21 +868,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "?"; else OS << vni->def; - unsigned ee = vni->kills.size(); - if (ee || vni->hasPHIKill()) { - OS << "-("; - for (unsigned j = 0; j != ee; ++j) { - OS << vni->kills[j]; - if (j != ee-1) - OS << " "; - } - if (vni->hasPHIKill()) { - if (ee) - OS << " "; - OS << "phi"; - } - OS << ")"; - } } } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a6d38ad..194d03d 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -50,9 +50,6 @@ using namespace llvm; static cl::opt DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); -static cl::opt EnableFastSpilling("fast-spill", - cl::init(false), cl::Hidden); - STATISTIC(numIntervals , "Number of original intervals"); STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); @@ -90,8 +87,8 @@ void LiveIntervals::releaseMemory() { r2iMap_.clear(); - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); while (!CloneMIs.empty()) { MachineInstr *MI = CloneMIs.back(); CloneMIs.pop_back(); @@ -195,6 +192,10 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) if (SrcReg == li.reg || DstReg == li.reg) continue; + if (MI.isCopy()) + if (MI.getOperand(0).getReg() == li.reg || + MI.getOperand(1).getReg() == li.reg) + continue; // Check for operands using reg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -218,10 +219,7 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, return false; } -/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except -/// it checks for sub-register reference and it can check use as well. -bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, - unsigned Reg, bool CheckUse, +bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, SmallPtrSet &JoinedCopies) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { @@ -239,12 +237,11 @@ bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, MachineOperand& MO = MI->getOperand(i); if (!MO.isReg()) continue; - if (MO.isUse() && !CheckUse) - continue; unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg)) + if (PhysReg == 0 || PhysReg == Reg || + TargetRegisterInfo::isVirtualRegister(PhysReg)) continue; - if (tri_->isSubRegister(Reg, PhysReg)) + if (tri_->regsOverlap(Reg, PhysReg)) return true; } } @@ -272,7 +269,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { if (MO.getReg() == Reg && MO.isDef()) { assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && MI.getOperand(MOIdx).getSubReg() && - MO.getSubReg()); + (MO.getSubReg() || MO.isImplicit())); return true; } } @@ -328,9 +325,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) { CopyMI = mi; + } VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -356,7 +354,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << "\n"); - ValNo->addKill(killIdx); return; } } @@ -376,7 +373,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely @@ -407,7 +403,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); - ValNo->addKill(killIdx); DEBUG(dbgs() << " +" << LR); } @@ -434,11 +429,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - // Two-address vregs should always only be redefined once. This means - // that at this point, there should be exactly one value number in it. - assert((PartReDef || interval.containsOneValue()) && - "Unexpected 2-addr liveint!"); - SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex(); SlotIndex RedefIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) RedefIndex = MIIdx.getUseIndex(); @@ -446,8 +436,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getUseIndex()); VNInfo *OldValNo = OldLR->valno; + SlotIndex DefIndex = OldValNo->def.getDefIndex(); - // Delete the initial value, which should be short and continuous, + // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. interval.removeRange(DefIndex, RedefIndex); @@ -464,15 +455,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (PartReDef && - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (PartReDef && (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))) OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); - ValNo->addKill(RedefIndex); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. @@ -496,7 +486,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()|| + if (mi->isCopyLike() || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -504,7 +494,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { @@ -600,7 +589,6 @@ exit: ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -615,7 +603,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() || + if (MI->isCopyLike() || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, @@ -701,7 +689,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, LiveRange LR(start, end, vni); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -787,37 +774,6 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { return NewLI; } -/// getVNInfoSourceReg - Helper function that parses the specified VNInfo -/// copy field and returns the source register that defines it. -unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { - if (!VNI->getCopy()) - return 0; - - if (VNI->getCopy()->isExtractSubreg()) { - // If it's extracting out of a physical register, return the sub-register. - unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm(); - unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg(); - if (SrcSubReg == DstSubReg) - // %reg1034:3 = EXTRACT_SUBREG %EDX, 3 - // reg1034 can still be coalesced to EDX. - return Reg; - assert(DstSubReg == 0); - Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); - } - return Reg; - } else if (VNI->getCopy()->isInsertSubreg() || - VNI->getCopy()->isSubregToReg()) - return VNI->getCopy()->getOperand(2).getReg(); - - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg)) - return SrcReg; - llvm_unreachable("Unrecognized copy instruction!"); - return 0; -} - //===----------------------------------------------------------------------===// // Register allocator hooks. // @@ -991,22 +947,22 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, if (DefMI && (MRInfo & VirtRegMap::isMod)) return false; - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot) - : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI); + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) + : tii_->foldMemoryOperand(MI, FoldOps, DefMI); if (fmi) { // Remember this instruction uses the spill slot. if (isSS) vrm.addSpillSlotUse(Slot, fmi); // Attempt to fold the memory reference into the instruction. If // we can do this, we don't need to insert spill code. - MachineBasicBlock &MBB = *MI->getParent(); if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); vrm.transferSpillPts(MI, fmi); vrm.transferRestorePts(MI, fmi); vrm.transferEmergencySpills(MI, fmi); ReplaceMachineInstrInMaps(MI, fmi); - MI = MBB.insert(MBB.erase(MI), fmi); + MI->eraseFromParent(); + MI = fmi; ++numFolds; return true; } @@ -1098,7 +1054,6 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (!mop.isReg()) continue; unsigned Reg = mop.getReg(); - unsigned RegI = Reg; if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (Reg != li.reg) @@ -1140,26 +1095,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // // Keep track of whether we replace a use and/or def so that we can // create the spill interval with the appropriate range. - - HasUse = mop.isUse(); - HasDef = mop.isDef(); SmallVector Ops; - Ops.push_back(i); - for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { - const MachineOperand &MOj = MI->getOperand(j); - if (!MOj.isReg()) - continue; - unsigned RegJ = MOj.getReg(); - if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ)) - continue; - if (RegJ == RegI) { - Ops.push_back(j); - if (!MOj.isUndef()) { - HasUse |= MOj.isUse(); - HasDef |= MOj.isDef(); - } - } - } + tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); // Create a new virtual register for the spill interval. // Create the new register now so we can map the fold instruction @@ -1294,16 +1231,7 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, MachineBasicBlock *MBB, SlotIndex Idx) const { - SlotIndex End = getMBBEndIdx(MBB); - for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { - if (VNI->kills[j].isPHI()) - continue; - - SlotIndex KillIdx = VNI->kills[j]; - if (KillIdx > Idx && KillIdx <= End) - return true; - } - return false; + return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); } /// RewriteInfo - Keep track of machine instrs that will be rewritten @@ -1312,10 +1240,7 @@ namespace { struct RewriteInfo { SlotIndex Index; MachineInstr *MI; - bool HasUse; - bool HasDef; - RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d) - : Index(i), MI(mi), HasUse(u), HasDef(d) {} + RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} }; struct RewriteInfoCompare { @@ -1394,7 +1319,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // easily see a situation where both registers are reloaded before // the INSERT_SUBREG and both target registers that would overlap. continue; - RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef())); + RewriteMIs.push_back(RewriteInfo(index, MI)); } std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); @@ -1404,18 +1329,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, RewriteInfo &rwi = RewriteMIs[i]; ++i; SlotIndex index = rwi.Index; - bool MIHasUse = rwi.HasUse; - bool MIHasDef = rwi.HasDef; MachineInstr *MI = rwi.MI; // If MI def and/or use the same register multiple times, then there // are multiple entries. - unsigned NumUses = MIHasUse; while (i != e && RewriteMIs[i].MI == MI) { assert(RewriteMIs[i].Index == index); - bool isUse = RewriteMIs[i].HasUse; - if (isUse) ++NumUses; - MIHasUse |= isUse; - MIHasDef |= RewriteMIs[i].HasDef; ++i; } MachineBasicBlock *MBB = MI->getParent(); @@ -1440,7 +1358,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // = use // It's better to start a new interval to avoid artifically // extend the new interval. - if (MIHasDef && !MIHasUse) { + if (MI->readsWritesVirtualRegister(li.reg) == + std::make_pair(false,true)) { MBBVRegsMap.erase(MBB->getNumber()); ThisVReg = 0; } @@ -1652,103 +1571,9 @@ LiveIntervals::normalizeSpillWeights(std::vector &NewLIs) { } std::vector LiveIntervals:: -addIntervalsForSpillsFast(const LiveInterval &li, - const MachineLoopInfo *loopInfo, - VirtRegMap &vrm) { - unsigned slot = vrm.assignVirt2StackSlot(li.reg); - - std::vector added; - - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.dump(); - dbgs() << '\n'; - }); - - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg); - while (RI != mri_->reg_end()) { - MachineInstr* MI = &*RI; - - SmallVector Indices; - bool HasUse = false; - bool HasDef = false; - - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg() || mop.getReg() != li.reg) continue; - - HasUse |= MI->getOperand(i).isUse(); - HasDef |= MI->getOperand(i).isDef(); - - Indices.push_back(i); - } - - if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI), - Indices, true, slot, li.reg)) { - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.assignVirt2StackSlot(NewVReg, slot); - - // create a new register for this spill - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.markNotSpillable(); - - // Rewrite register operands to use the new vreg. - for (SmallVectorImpl::iterator I = Indices.begin(), - E = Indices.end(); I != E; ++I) { - MI->getOperand(*I).setReg(NewVReg); - - if (MI->getOperand(*I).isUse()) - MI->getOperand(*I).setIsKill(true); - } - - // Fill in the new live interval. - SlotIndex index = getInstructionIndex(MI); - if (HasUse) { - LiveRange LR(index.getLoadIndex(), index.getUseIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addRestorePoint(NewVReg, MI); - } - if (HasDef) { - LiveRange LR(index.getDefIndex(), index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addSpillPoint(NewVReg, true, MI); - } - - added.push_back(&nI); - - DEBUG({ - dbgs() << "\t\t\t\tadded new interval: "; - nI.dump(); - dbgs() << '\n'; - }); - } - - - RI = mri_->reg_begin(li.reg); - } - - return added; -} - -std::vector LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, SmallVectorImpl &SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - - if (EnableFastSpilling) - return addIntervalsForSpillsFast(li, loopInfo, vrm); - assert(li.isSpillable() && "attempt to spill already spilled interval!"); DEBUG({ @@ -2184,7 +2009,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, SlotIndex(getInstructionIndex(startInst).getDefIndex()), startInst, true, getVNInfoAllocator()); VN->setHasPHIKill(true); - VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent())); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getDefIndex()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 798b9b9..709e2c6 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -35,8 +35,8 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { } void LiveStacks::releaseMemory() { - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); S2IMap.clear(); S2RCMap.clear(); } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 079684e..41b891d 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -286,7 +286,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) - return false; + return 0; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; @@ -609,7 +609,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) { + // Things marked both call and return are tail calls; do not do this for + // them. The tail callee need not take the same registers as input + // that it produces as output, and there are dependencies for its input + // registers elsewhere. + if (!MBB->empty() && MBB->back().getDesc().isReturn() + && !MBB->back().getDesc().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index b0348a5..dfd4eae 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -53,15 +53,15 @@ namespace { bool runOnMachineFunction(MachineFunction&); private: - bool LowerExtract(MachineInstr *MI); - bool LowerInsert(MachineInstr *MI); bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo *TRI); void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, const TargetRegisterInfo *TRI, bool AddIfNotFound = false); + void TransferImplicitDefs(MachineInstr *MI); }; char LowerSubregsInstructionPass::ID = 0; @@ -83,7 +83,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, if (MII->addRegisterDead(DstReg, TRI)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference destination register!"); + "copyPhysReg output doesn't reference destination register!"); } } @@ -100,64 +100,24 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference source register!"); + "copyPhysReg output doesn't reference source register!"); } } -bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - - assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && - MI->getOperand(2).isImm() && "Malformed extract_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SuperReg = MI->getOperand(1).getReg(); - unsigned SubIdx = MI->getOperand(2).getImm(); - unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx); - - assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) && - "Extract supperg source must be a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && - "Extract destination must be in a physical register"); - assert(SrcReg && "invalid subregister index for register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (SrcReg == DstReg) { - // No need to insert an identity copy instruction. - if (MI->getOperand(1).isKill()) { - // We must make sure the super-register gets killed. Replace the - // instruction with KILL. - MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(2); // SubIdx - DEBUG(dbgs() << "subreg: replace by: " << *MI); - return true; - } - - DEBUG(dbgs() << "subreg: eliminated!"); - } else { - // Insert copy - const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); - const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstReg, TRI); - if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, SuperReg, TRI, true); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); } - - DEBUG(dbgs() << '\n'); - MBB->erase(MI); - return true; } bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { @@ -166,10 +126,10 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { MI->getOperand(1).isImm() && (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - + unsigned DstReg = MI->getOperand(0).getReg(); unsigned InsReg = MI->getOperand(2).getReg(); - unsigned InsSIdx = MI->getOperand(2).getSubReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); @@ -182,27 +142,25 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - if (DstSubReg == InsReg && InsSIdx == 0) { + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: - // %RAX = ... - // %RAX = SUBREG_TO_REG 0, %EAX:3, 3 - // The first def is defining RAX, not EAX so the top bits were not - // zero extended. + // %RAX = SUBREG_TO_REG 0, %EAX, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } DEBUG(dbgs() << "subreg: eliminated!"); } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(2).isKill()) - TransferKillFlag(MI, InsReg, TRI); DEBUG({ MachineBasicBlock::iterator dMI = MI; dbgs() << "subreg: " << *(--dMI); @@ -214,87 +172,39 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { return true; } -bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && - (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) && - (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && - MI->getOperand(3).isImm() && "Invalid insert_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); -#ifndef NDEBUG - unsigned SrcReg = MI->getOperand(1).getReg(); -#endif - unsigned InsReg = MI->getOperand(2).getReg(); - unsigned SubIdx = MI->getOperand(3).getImm(); +bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); - assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); - assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(DstSubReg && "invalid subregister index for register"); - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Insert superreg source must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && - "Inserted value must be in a physical register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (DstSubReg == InsReg) { - // No need to insert an identity copy instruction. If the SrcReg was - // , we need to make sure it is alive by inserting a KILL - if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstReg); - if (MI->getOperand(2).isUndef()) - MIB.addReg(InsReg, RegState::Undef); - else - MIB.addReg(InsReg, RegState::Kill); - } else { - DEBUG(dbgs() << "subreg: eliminated!\n"); - MBB->erase(MI); + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); return true; } - } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - if (MI->getOperand(2).isUndef()) - // If the source register being inserted is undef, then this becomes a - // KILL. - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstSubReg); - else { - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - } - MachineBasicBlock::iterator CopyMI = MI; - --CopyMI; - - // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg. - if (!MI->getOperand(1).isUndef()) - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) { - TransferDeadFlag(MI, DstSubReg, TRI); - } else { - // Make sure the full DstReg is live after this replacement. - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); - } - - // Make sure the inserted register gets killed - if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef()) - TransferKillFlag(MI, InsReg, TRI); + // Vanilla identity copy. + MI->eraseFromParent(); + return true; } - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI) << "\n"; - }); + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - MBB->erase(MI); + if (DstMO.isDead()) + TransferDeadFlag(MI, DstMO.getReg(), TRI); + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); return true; } @@ -317,12 +227,13 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi != me;) { MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; - if (MI->isExtractSubreg()) { - MadeChange |= LowerExtract(MI); - } else if (MI->isInsertSubreg()) { - MadeChange |= LowerInsert(MI); - } else if (MI->isSubregToReg()) { + assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear"); + assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + "EXTRACT_SUBREG should no longer appear"); + if (MI->isSubregToReg()) { MadeChange |= LowerSubregToReg(MI); + } else if (MI->isCopy()) { + MadeChange |= LowerCopy(MI); } mi = nmi; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index eaaa1f8..a27ee47 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/BasicBlock.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -136,6 +139,13 @@ void ilist_traits::deleteNode(MachineInstr* MI) { Parent->getParent()->DeleteMachineInstr(MI); } +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + iterator I = begin(); + while (I != end() && I->isPHI()) + ++I; + return I; +} + MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator I = end(); while (I != begin() && (--I)->getDesc().isTerminator()) @@ -245,6 +255,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; + DebugLoc dl; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -259,7 +270,7 @@ void MachineBasicBlock::updateTerminator() { // its layout successor, insert a branch. TBB = *succ_begin(); if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { if (FBB) { @@ -270,10 +281,10 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, 0, Cond); + TII->InsertBranch(*this, FBB, 0, Cond, dl); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { // The block has a fallthrough conditional branch. @@ -284,14 +295,14 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); } else if (!isLayoutSuccessor(MBBA)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond); + TII->InsertBranch(*this, TBB, MBBA, Cond, dl); } } } @@ -331,12 +342,32 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), - E = fromMBB->succ_end(); I != E; ++I) - addSuccessor(*I); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; - while (!fromMBB->succ_empty()) - fromMBB->removeSuccessor(fromMBB->succ_begin()); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); + MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == fromMBB) + MO.setMBB(this); + } + } } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { @@ -395,6 +426,82 @@ bool MachineBasicBlock::canFallThrough() { return FBB == 0; } +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + MachineFunction *MF = getParent(); + DebugLoc dl; // FIXME: this is nowhere + + // We may need to update this's terminator, but we can't do that if AnalyzeBranch + // fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return NULL; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "PHIElimination splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + ReplaceUsesOfBlockWith(Succ, NMBB); + updateTerminator(); + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV = + P->getAnalysisIfAvailable()) + LV->addNewBlock(NMBB, this, Succ); + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable()) + MDT->addNewBlock(NMBB, this); + + if (MachineLoopInfo *MLI = + P->getAnalysisIfAvailable()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 6f4f7a8..833cc00 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -30,9 +30,7 @@ using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); - -static cl::opt CSEPhysDef("machine-cse-phys-defs", - cl::init(false), cl::Hidden); +STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); namespace { class MachineCSE : public MachineFunctionPass { @@ -128,6 +126,28 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, ++NumCoalesces; Changed = true; } + + if (!DefMI->isCopy()) + continue; + SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); + if (!NewRC) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + if (NewRC != SRC) + MRI->setRegClass(SrcReg, NewRC); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; } return Changed; @@ -172,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// hasLivePhysRegDefUse - Return true if the specified instruction read / write /// physical registers (except for dead defs of physical registers). It also -/// returns the physical register def by reference if it's the only one. +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, const MachineBasicBlock *MBB, unsigned &PhysDef) const { @@ -186,9 +207,11 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) + if (MO.isUse()) { // Can't touch anything to read a physical register. + PhysDef = 0; return true; + } if (MO.isDead()) // If the def is dead, it's ok. continue; @@ -240,8 +263,8 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || - MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg(); + return MI->isCopyLike() || + TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); } bool MachineCSE::isCSECandidate(MachineInstr *MI) { @@ -356,6 +379,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!isCSECandidate(MI)) continue; + bool DefPhys = false; bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Look for trivial copy coalescing opportunities. @@ -376,11 +400,13 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // ... Unless the CS is local and it also defines the physical register // which is not clobbered in between. - if (PhysDef && CSEPhysDef) { + if (PhysDef) { unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefReaches(CSMI, MI, PhysDef)) + if (PhysRegDefReaches(CSMI, MI, PhysDef)) { FoundCSE = true; + DefPhys = true; + } } } @@ -426,6 +452,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } MI->eraseFromParent(); ++NumCSEs; + if (DefPhys) + ++NumPhysCSEs; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 4088739..b5f8fbb 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -46,7 +46,6 @@ MachineDominatorTree::MachineDominatorTree() } MachineDominatorTree::~MachineDominatorTree() { - DT->releaseMemory(); delete DT; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index a38c881..666120f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr()); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -388,7 +388,7 @@ void MachineFunction::viewCFGOnly() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr(), true); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -438,10 +438,16 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isSS) { + bool Immutable) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable, - isSS)); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = TFI.getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/false)); return -++NumFixedObjects; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e54cd5c..6b2e985 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,26 @@ void MachineOperand::setReg(unsigned Reg) { Contents.Reg.RegNo = Reg; } +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + assert(Reg && "Invalid SubReg for physical register"); + setSubReg(0); + } + setReg(Reg); +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. @@ -861,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const { bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { if (isInlineAsm()) { - assert(DefOpIdx >= 2); + assert(DefOpIdx >= 3); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; unsigned DefPart = 0; - for (unsigned i = 1, e = getNumOperands(); i < e; ) { + for (unsigned i = 2, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); // After the normal asm operands there may be additional imp-def regs. if (!FMO.isImm()) @@ -883,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } ++DefNo; } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { const MachineOperand &FMO = getOperand(i); if (!FMO.isImm()) continue; @@ -926,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { // Find the flag operand corresponding to UseOpIdx unsigned FlagIdx, NumOps=0; - for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { const MachineOperand &UFMO = getOperand(FlagIdx); // After the normal asm operands there may be additional imp-def regs. if (!UFMO.isImm()) @@ -944,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { if (!DefOpIdx) return true; - unsigned DefIdx = 1; - // Remember to adjust the index. First operand is asm string, then there - // is a flag for each. + unsigned DefIdx = 2; + // Remember to adjust the index. First operand is asm string, second is + // the AlignStack bit, then there is a flag for each. while (DefNo) { const MachineOperand &FMO = getOperand(DefIdx); assert(FMO.isImm()); @@ -1017,6 +1037,29 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { } } +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. @@ -1168,6 +1211,28 @@ void MachineInstr::dump() const { dbgs() << " " << *this; } +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << ""; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; @@ -1240,6 +1305,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "!\"" << MDS->getString() << '\"'; else MO.print(OS, TM); + } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); } else MO.print(OS, TM); } @@ -1265,19 +1332,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; - - // TODO: print InlinedAtLoc information - - DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext())); OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << ""; - OS << ':' << debugLoc.getLine(); - if (debugLoc.getCol() != 0) - OS << ':' << debugLoc.getCol(); + printDebugLoc(debugLoc, MF, OS); } OS << "\n"; @@ -1418,6 +1474,25 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsImp*/)); } +void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, + const TargetRegisterInfo &TRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + bool Dead = true; + for (SmallVectorImpl::const_iterator I = UsedRegs.begin(), + E = UsedRegs.end(); I != E; ++I) + if (TRI.regsOverlap(*I, Reg)) { + Dead = false; + break; + } + // If there are no uses, including partial uses, the def is dead. + if (Dead) MO.setIsDead(); + } +} + unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { unsigned Hash = MI->getOpcode() * 37; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 6120617..956d21c 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -62,6 +62,7 @@ namespace { // State that is updated as we process loops bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. @@ -82,7 +83,6 @@ namespace { const char *getPassName() const { return "Machine Instruction LICM"; } - // FIXME: Loop preheaders? virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); @@ -127,8 +127,8 @@ namespace { void AddToLiveIns(unsigned Reg); /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's obviously - /// not safe to hoist it. + /// candidate for LICM. e.g. If the instruction is a call, then it's + /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); /// IsLoopInvariantInst - Returns true if the instruction is loop @@ -181,6 +181,10 @@ namespace { /// current loop preheader that may become duplicates of instructions that /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); + + /// getCurPreheader - Get the preheader for the current loop, splitting + /// a critical edge if needed. + MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -192,12 +196,17 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); } -/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most -/// loop that has a preheader. -static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { +/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most +/// loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) - if (L->getLoopPreheader()) + if (L->getLoopPredecessor()) return false; + // None of them did, so this is the outermost with a unique predecessor. return true; } @@ -207,7 +216,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { else DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); - Changed = false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -220,23 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis(); AA = &getAnalysis(); - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ - CurLoop = *I; + SmallVector Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = 0; // If this is done before regalloc, only visit outer-most preheader-sporting // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) - continue; - - // Determine the block to which to hoist instructions. If we can't find a - // suitable loop preheader, we can't do any hoisting. - // - // FIXME: We are only hoisting if the basic block coming into this loop - // has only one successor. This isn't the case in general because we haven't - // broken critical edges or added preheaders. - CurPreheader = CurLoop->getLoopPreheader(); - if (!CurPreheader) + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); continue; + } if (!PreRegAlloc) HoistRegionPostRA(); @@ -244,6 +247,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // CSEMap is initialized for loop header when the first instruction is // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; HoistRegion(N); CSEMap.clear(); } @@ -436,13 +440,16 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// operands that is safe to hoist, this instruction is called to do the /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -451,7 +458,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); - CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is @@ -490,26 +497,16 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they + // are an argument to some other otherwise-hoistable instruction? if (I.isImplicitDef()) return false; - - const TargetInstrDesc &TID = I.getDesc(); - // Ignore stuff that we obviously can't hoist. - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) return false; - - if (TID.mayLoad()) { - // Okay, this instruction does a load. As a refinement, we allow the target - // to decide whether the loaded value is actually a constant. If so, we can - // actually use it as a load. - if (!I.isInvariantLoad(AA)) - // FIXME: we should be able to hoist loads with no other side effects if - // there are no other instructions which can change memory in this loop. - // This is a trivial form of alias analysis. - return false; - } + return true; } @@ -754,6 +751,9 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// that are safe to hoist, this instruction is called to do the dirty work. /// void MachineLICM::Hoist(MachineInstr *MI) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -765,9 +765,9 @@ void MachineLICM::Hoist(MachineInstr *MI) { // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -776,7 +776,10 @@ void MachineLICM::Hoist(MachineInstr *MI) { // If this is the first instruction being hoisted to the preheader, // initialize the CSE map with potential common expressions. - InitCSEMap(CurPreheader); + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); @@ -784,7 +787,7 @@ void MachineLICM::Hoist(MachineInstr *MI) { CI = CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. - CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Clear the kill flags of any register this instruction defines, // since they may need to be live throughout the entire loop @@ -808,3 +811,30 @@ void MachineLICM::Hoist(MachineInstr *MI) { ++NumHoisted; Changed = true; } + +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast(-1)) + return 0; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast(-1); + return 0; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast(-1); + return 0; + } + } + } + return CurPreheader; +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 70bf7e5..5d852f2 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. + RegClass2VRegMap = new std::vector[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; + delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. @@ -52,7 +53,7 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { // Remove from old register class's vregs list. This may be slow but // fortunately this operation is rarely needed. std::vector &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector::iterator I=std::find(VRegs.begin(), VRegs.end(), VR); + std::vector::iterator I = std::find(VRegs.begin(), VRegs.end(), VR); VRegs.erase(I); // Add to new register class's vregs list. @@ -174,115 +175,36 @@ unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { return 0; } -static cl::opt -SchedLiveInCopies("schedule-livein-copies", cl::Hidden, - cl::desc("Schedule copies of livein registers"), - cl::init(false)); - -/// EmitLiveInCopy - Emit a copy for a live in physical register. If the -/// physical register has only a single copy use, then coalesced the copy -/// if possible. -static void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap &CopyRegMap, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - unsigned NumUses = 0; - MachineInstr *UseMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), - UE = MRI.use_end(); UI != UE; ++UI) { - UseMI = &*UI; - if (++NumUses > 1) - break; - } - - // If the number of uses is not one, or the use is not a move instruction, - // don't coalesce. Also, only coalesce away a virtual register to virtual - // register copy. - bool Coalesced = false; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (NumUses == 1 && - TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { - VirtReg = DstReg; - Coalesced = true; - } - - // Now find an ideal location to insert the copy. - MachineBasicBlock::iterator Pos = InsertPos; - while (Pos != MBB->begin()) { - MachineInstr *PrevMI = prior(Pos); - DenseMap::iterator RI = CopyRegMap.find(PrevMI); - // copyRegToReg might emit multiple instructions to do a copy. - unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; - if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) - // This is what the BB looks like right now: - // r1024 = mov r0 - // ... - // r1 = mov r1024 - // - // We want to insert "r1025 = mov r1". Inserting this copy below the - // move to r1024 makes it impossible for that move to be coalesced. - // - // r1025 = mov r1 - // r1024 = mov r0 - // ... - // r1 = mov 1024 - // r2 = mov 1025 - break; // Woot! Found a good location. - --Pos; - } - - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); - if (Coalesced) { - if (&*InsertPos == UseMI) ++InsertPos; - MBB->erase(UseMI); - } -} - /// EmitLiveInCopies - Emit copies to initialize livein virtual registers /// into the given entry block. void MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { - if (SchedLiveInCopies) { - // Emit the copies at a heuristically-determined location in the block. - DenseMap CopyRegMap; - MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, - RC, CopyRegMap, *this, TRI, TII); + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); } - } else { - // Emit the copies into the top of the block. - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - } - } - - // Add function live-ins to entry block live-in set. - for (MachineRegisterInfo::livein_iterator I = livein_begin(), - E = livein_end(); I != E; ++I) - EntryMBB->addLiveIn(I->first); + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } } void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1610e6c..61334fc 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This pass moves instructions into successor blocks, when possible, so that +// This pass moves instructions into successor blocks when possible, so that // they aren't executed on paths where their results aren't needed. // // This pass is not intended to be a replacement or a complete alternative @@ -45,9 +45,9 @@ namespace { public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(&ID) {} - + virtual bool runOnMachineFunction(MachineFunction &MF); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -63,7 +63,7 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; }; } // end anonymous namespace - + char MachineSinking::ID = 0; static RegisterPass X("machine-sink", "Machine code sinking"); @@ -72,7 +72,7 @@ FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. -bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); @@ -80,27 +80,30 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. - for (MachineRegisterInfo::use_nodbg_iterator I = - RegInfo->use_nodbg_begin(Reg), - E = RegInfo->use_nodbg_end(); I != E; ++I) { + for (MachineRegisterInfo::use_nodbg_iterator + I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end(); + I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); } + // Check that it dominates. if (!DT->dominates(MBB, UseBlock)) return false; } + return true; } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Machine Sinking ********\n"); - + const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); @@ -111,19 +114,19 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; - + while (1) { bool MadeChange = false; // Process all basic blocks. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); - + // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; - } + } return EverMadeChange; } @@ -132,8 +135,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MBB.succ_size() <= 1 || MBB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being - // unprofitable, it can also lead to infinite looping, because in an unreachable - // loop there may be nowhere to stop. + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; @@ -144,7 +147,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { bool ProcessedBegin, SawStore = false; do { MachineInstr *MI = I; // The instruction to sink. - + // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == MBB.begin(); @@ -156,10 +159,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; - + // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); - + return MadeChange; } @@ -169,7 +172,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; - + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to @@ -177,22 +180,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - + // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. MachineBasicBlock *ParentBlock = MI->getParent(); - + // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - + unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register @@ -200,13 +203,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // it could get allocated to something with a def during allocation. if (!RegInfo->def_empty(Reg)) return false; + if (AllocatableSet.test(Reg)) return false; + // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (!RegInfo->def_empty(AliasReg)) return false; + if (AllocatableSet.test(AliasReg)) return false; } @@ -221,28 +227,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) return false; - + // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where // sinking can happen but where the sink point isn't a successor. For // example: + // // x = computation // if () {} else {} // use x - // the instruction could be sunk over the whole diamond for the + // + // the instruction could be sunk over the whole diamond for the // if/then/else (or loop, etc), allowing it to be sunk into other blocks // after that. - + // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) return false; + continue; } - + // Otherwise, we should look at all the successors and decide which one // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), @@ -252,13 +261,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { break; } } - + // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; } } - + // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; @@ -267,15 +276,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // landing pad is implicitly defined. if (SuccToSinkTo->isLandingPad()) return false; - + // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MI->getParent() == SuccToSinkTo) return false; - - DEBUG(dbgs() << "Sink instr " << *MI); - DEBUG(dbgs() << "to block " << *SuccToSinkTo); - + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. () + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. @@ -305,18 +325,18 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Otherwise we are OK with sinking along a critical edge. DEBUG(dbgs() << "Sinking along critical edge.\n"); } - - // Determine where to insert into. Skip phi nodes. + + // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; - + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Conservatively, clear any kill flags, since it's possible that - // they are no longer correct. + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. MI->clearKillInfo(); return true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8baf01c..2297c90 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp index 41fc204..dcdc243 100644 --- a/lib/CodeGen/OptimizeExts.cpp +++ b/lib/CodeGen/OptimizeExts.cpp @@ -118,6 +118,26 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // It's an error to translate this: + // + // %reg1025 = %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the , + // not the original value of %reg1024 before . + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. @@ -165,8 +185,8 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR) - .addReg(DstReg).addImm(SubIdx); + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 2717d4d..1613fe2 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -107,6 +107,11 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, SrcSubIdx == 0 && DstSubIdx == 0 && TargetRegisterInfo::isVirtualRegister(MvSrcReg)) SrcMI = MRI->getVRegDef(MvSrcReg); + else if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); if (!SrcMI) return false; diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index bd18b52..02938df 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -406,7 +406,7 @@ namespace PBQP { // Create node data objects. for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { + nItr != nEnd; ++nItr) { nodeDataList.push_back(NodeData()); g.setNodeData(nItr, &nodeDataList.back()); } diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 30d34d9..4c1ce11 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H -#include "llvm/Support/Compiler.h" #include "../HeuristicSolver.h" #include "../HeuristicBase.h" @@ -267,8 +266,8 @@ namespace PBQP { if (!nd.isHeuristic) return; - EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr); - + EdgeData &ed = getHeuristicEdgeData(eItr); + (void)ed; assert(ed.isUpToDate && "Edge data is not up to date."); // Update node. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index edbc13f..ea6b094 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -34,7 +34,6 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); -STATISTIC(NumSplits, "Number of critical edges split on demand"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; @@ -184,7 +183,6 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? @@ -208,10 +206,12 @@ void llvm::PHIElimination::LowerAtomicPHINode( ++NumReused; DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); } // Update live variable information if there is any. @@ -293,8 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -391,57 +391,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) - SplitCriticalEdge(PreMBB, &MBB); + PreMBB->SplitCriticalEdge(&MBB, this); } } return true; } - -MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, - MachineBasicBlock *B) { - assert(A && B && "Missing MBB end point"); - - MachineFunction *MF = A->getParent(); - - // We may need to update A's terminator, but we can't do that if AnalyzeBranch - // fails. If A uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*A, TBB, FBB, Cond)) - return NULL; - - ++NumSplits; - - MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); - DEBUG(dbgs() << "PHIElimination splitting critical edge:" - " BB#" << A->getNumber() - << " -- BB#" << NMBB->getNumber() - << " -- BB#" << B->getNumber() << '\n'); - - A->ReplaceUsesOfBlockWith(B, NMBB); - A->updateTerminator(); - - // Insert unconditional "jump B" instruction in NMBB if necessary. - NMBB->addSuccessor(B); - if (!NMBB->isLayoutSuccessor(B)) { - Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); - } - - // Fix PHI nodes in B so they refer to NMBB instead of A - for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == A) - i->getOperand(ni+1).setMBB(NMBB); - - if (LiveVariables *LV=getAnalysisIfAvailable()) - LV->addNewBlock(NMBB, A, B); - - if (MachineDominatorTree *MDT=getAnalysisIfAvailable()) - MDT->addNewBlock(NMBB, A); - - return NMBB; -} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 5ea2941..3489db2 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -24,6 +24,11 @@ using namespace llvm; //===---------------------------------------------------------------------===// MachinePassRegistry RegisterRegAlloc::Registry; +static FunctionPass *createDefaultRegisterAllocator() { return 0; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + createDefaultRegisterAllocator); //===---------------------------------------------------------------------===// /// @@ -33,8 +38,8 @@ MachinePassRegistry RegisterRegAlloc::Registry; static cl::opt > RegAlloc("regalloc", - cl::init(&createLinearScanRegisterAllocator), - cl::desc("Register allocator to use (default=linearscan)")); + cl::init(&createDefaultRegisterAllocator), + cl::desc("Register allocator to use")); //===---------------------------------------------------------------------===// @@ -42,13 +47,22 @@ RegAlloc("regalloc", /// createRegisterAllocator - choose the appropriate register allocator. /// //===---------------------------------------------------------------------===// -FunctionPass *llvm::createRegisterAllocator() { +FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); - + if (!Ctor) { Ctor = RegAlloc; RegisterRegAlloc::setDefault(RegAlloc); } - - return Ctor(); + + if (Ctor != createDefaultRegisterAllocator) + return Ctor(); + + // When the 'default' allocator is requested, pick one based on OptLevel. + switch (OptLevel) { + case CodeGenOpt::None: + return createFastRegisterAllocator(); + default: + return createLinearScanRegisterAllocator(); + } } diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp new file mode 100644 index 0000000..cbde2b0 --- /dev/null +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -0,0 +1,180 @@ +//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a hazard recognizer using the instructions itineraries +// defined for the current target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrItineraries.h" + +using namespace llvm; + +PostRAHazardRecognizer:: +PostRAHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) { + // Determine the maximum depth of any itinerary. This determines the + // depth of the scoreboard. We always make the scoreboard at least 1 + // cycle deep to avoid dealing with the boundary condition. + unsigned ScoreboardDepth = 1; + if (!ItinData.isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData.isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData.beginStage(idx); + const InstrStage *E = ItinData.endStage(idx); + unsigned ItinDepth = 0; + for (; IS != E; ++IS) + ItinDepth += IS->getCycles(); + + ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + } + } + + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); + + DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " + << ScoreboardDepth << '\n'); +} + +void PostRAHazardRecognizer::Reset() { + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); +} + +void PostRAHazardRecognizer::ScoreBoard::dump() const { + dbgs() << "Scoreboard:\n"; + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = (*this)[i]; + dbgs() << "\t"; + for (int j = 31; j >= 0; j--) + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; + } +} + +ScheduleHazardRecognizer::HazardType +PostRAHazardRecognizer::getHazardType(SUnit *SU) { + if (ItinData.isEmpty()) + return NoHazard; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + if (!freeUnits) { + DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + DEBUG(SU->getInstr()->dump()); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { + if (ItinData.isEmpty()) + return; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + assert(freeUnit && "No function unit available!"); + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); +} + +void PostRAHazardRecognizer::AdvanceCycle() { + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); +} diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 9714ea6..4af8e07 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,8 +22,6 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "ExactHazardRecognizer.h" -#include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -65,10 +63,6 @@ EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden); -static cl::opt -EnablePostRAHazardAvoidance("avoid-hazards", - cl::desc("Enable exact hazard avoidance"), - cl::init(true), cl::Hidden); // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod static cl::opt @@ -85,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; + const TargetInstrInfo *TII; CodeGenOpt::Level OptLevel; public: @@ -187,30 +182,9 @@ namespace { }; } -/// isSchedulingBoundary - Test if the given instruction should be -/// considered a scheduling boundary. This primarily includes labels -/// and terminators. -/// -static bool isSchedulingBoundary(const MachineInstr *MI, - const MachineFunction &MF) { - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) - return true; - - // Don't attempt to schedule around any instruction that defines - // a stack-oriented pointer, as it's unlikely to be profitable. This - // saves compile time, because it doesn't require every single - // stack slot reference to depend on the instruction that does the - // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) - return true; - - return false; -} - bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis(); + TII = Fn.getTarget().getInstrInfo(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -237,10 +211,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { const MachineLoopInfo &MLI = getAnalysis(); const MachineDominatorTree &MDT = getAnalysis(); - const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); - ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? - (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : - (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); + const TargetMachine &TM = Fn.getTarget(); + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + ScheduleHazardRecognizer *HR = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : @@ -271,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = prior(I); - if (isSchedulingBoundary(MI, Fn)) { + MachineInstr *MI = llvm::prior(I); + if (TII->isSchedulingBoundary(MI, MBB, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); Scheduler.EmitSchedule(); Current = MI; @@ -680,15 +654,6 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; - - // If we are using the target-specific hazards, then don't - // advance the cycle time just because we schedule a node. If - // the target allows it we can schedule multiple nodes in the - // same cycle. - if (!EnablePostRAHazardAvoidance) { - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } } else { if (CycleHasInsts) { DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 96e7327..fb2f909 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -512,9 +512,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); // FIXME: Need to set kills properly for inter-block stuff. - if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); } else if (ContainsDefs && ContainsUses) { SmallPtrSet& BlockDefs = Defs[MBB]; SmallPtrSet& BlockUses = Uses[MBB]; @@ -556,12 +553,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, NewVNs, LiveOut, Phis, false, true); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - if (foundUse && RetVNI->isKill(StartIndex)) - RetVNI->removeKill(StartIndex); - if (IsIntraBlock) { - RetVNI->addKill(EndIndex); - } } // Memoize results so we don't have to recompute them. @@ -636,9 +627,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us for (DenseMap::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { I->second->setHasPHIKill(true); - SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true); - if (!I->second->isKill(KillIndex)) - I->second->addKill(KillIndex); } } @@ -648,8 +636,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us } else EndIndex = LIs->getMBBEndIdx(MBB); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); // Memoize results so we don't have to recompute them. if (!IsIntraBlock) @@ -691,10 +677,12 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { // If the def is a move, set the copy field. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { if (DstReg == LI->reg) NewVN->setCopy(&*DI); - + } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + NewVN->setCopy(&*DI); + NewVNs[&*DI] = NewVN; } @@ -725,25 +713,6 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { VNInfo* DeadVN = NewVNs[&*DI]; LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - DeadVN->addKill(DefIdx); - } - - // Update kill markers. - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo* VNI = *VI; - for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) { - SlotIndex KillIdx = VNI->kills[i]; - if (KillIdx.isPHI()) - continue; - MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx); - if (KillMI) { - MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg); - if (KillMO) - // It could be a dead def. - KillMO->setIsKill(); - } - } } } @@ -773,19 +742,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNsToCopy.push_back(OldVN); // Locate two-address redefinitions - for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(), - KE = OldVN->kills.end(); KI != KE; ++KI) { - assert(!KI->isPHI() && - "VN previously reported having no PHI kills."); - MachineInstr* MI = LIs->getInstructionFromIndex(*KI); - unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); - if (DefIdx == ~0U) continue; - if (MI->isRegTiedToUseOperand(DefIdx)) { - VNInfo* NextVN = - CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex()); - if (NextVN == OldVN) continue; + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); + VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); + if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != + VNsToCopy.end()) Stack.push_back(NextVN); - } } } @@ -836,7 +800,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { if (IntervalSSMap.count(CurrLI->reg)) IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - NumRenumbers++; + ++NumRenumbers; } bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, @@ -854,7 +818,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); ReconstructLiveInterval(CurrLI); @@ -899,12 +863,11 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); } - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumFolds; IntervalSSMap[vreg] = SS; @@ -980,12 +943,11 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, if (!TII->canFoldMemoryOperand(FoldPt, Ops)) return 0; - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumRestoreFolds; } @@ -1192,7 +1154,7 @@ unsigned PreAllocSplitting::getNumberOfNonSpills( int StoreFrameIndex; unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - NonSpills++; + ++NonSpills; int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) @@ -1255,7 +1217,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); VNUseCount.erase(CurrVN); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1291,9 +1253,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { Ops.push_back(OpIdx); if (!TII->canFoldMemoryOperand(use, Ops)) continue; - MachineInstr* NewMI = - TII->foldMemoryOperand(*use->getParent()->getParent(), - use, Ops, FrameIndex); + MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); if (!NewMI) continue; @@ -1303,10 +1263,9 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - MachineBasicBlock* MBB = use->getParent(); - NewMI = MBB->insert(MBB->erase(use), NewMI); + use->eraseFromParent(); VNUseCount[CurrVN].erase(use); - + // Remove deleted instructions. Note that we need to remove them from // the VNInfo->use map as well, just to be safe. for (SmallPtrSet::iterator II = @@ -1328,7 +1287,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { if (VI->second.erase(use)) VI->second.insert(NewMI); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1350,7 +1309,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { LIs->RemoveMachineInstrFromMaps(DefMI); (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - NumDeadSpills++; + ++NumDeadSpills; changed = true; } } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 62f525f..ca4c477 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -46,14 +46,14 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, const TargetInstrInfo *tii_) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) + Reg == SrcReg && DstSubReg == 0) return true; - if (OpIdx == 2 && MI->isSubregToReg()) - return true; - if (OpIdx == 1 && MI->isExtractSubreg()) - return true; - return false; + switch(OpIdx) { + case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0; + case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0; + default: return false; + } } /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure @@ -101,11 +101,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } - if (MI->isInsertSubreg()) { - MachineOperand &MO = MI->getOperand(2); + // Eliminate %reg1032:sub = COPY undef. + if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + MachineOperand &MO = MI->getOperand(1); if (ImpDefRegs.count(MO.getReg())) { - // %reg1032 = INSERT_SUBREG %reg1032, undef, 2 - // This is an identity copy, eliminate it now. if (MO.isKill()) { LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); vi.removeKill(MI); @@ -119,7 +118,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -144,6 +143,12 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; MO.setIsUndef(); + // This is a partial register redef of an implicit def. + // Make sure the whole register is defined by the instruction. + if (MO.isDef()) { + MI->addRegisterDefined(Reg); + continue; + } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other uses of for (unsigned j = i+1; j != e; ++j) { @@ -219,8 +224,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) { + if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg && + RMI->getOperand(0).getSubReg() == 0) || + (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg && DstSubReg == 0)) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e778024..3843b25 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - AdjustsStack = true; + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; } MFI->setAdjustsStack(AdjustsStack); @@ -202,22 +202,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) return; - // Figure out which *callee saved* registers are modified by the current - // function, thus needing to be saved and restored in the prolog/epilog. - const TargetRegisterClass * const *CSRegClasses = - RegInfo->getCalleeSavedRegClasses(&Fn); - std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); break; } } @@ -236,7 +231,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { for (std::vector::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = I->getRegClass(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { @@ -265,8 +260,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, - true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); @@ -303,8 +297,10 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI); + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); } } @@ -328,9 +324,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // terminators that preceed it. if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), - CSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -374,10 +372,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); } } @@ -423,9 +423,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -639,6 +641,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); @@ -646,6 +651,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); @@ -712,7 +721,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } - assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); } } @@ -870,11 +885,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Scavenge a new scratch register CurrentVirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); PrevValue = Value; } // replace this reference to the virtual register with the diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index b3b5760..f44478e 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -110,6 +110,11 @@ namespace { // Allocatable - vector of allocatable physical registers. BitVector Allocatable; + // SkippedInstrs - Descriptors of instructions whose clobber list was ignored + // because all registers were spilled. It is still necessary to mark all the + // clobbered registers as used by the function. + SmallPtrSet SkippedInstrs; + // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should // not be erased. @@ -135,6 +140,8 @@ namespace { private: bool runOnMachineFunction(MachineFunction &Fn); void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl &VirtDead); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); @@ -508,27 +515,20 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, bool New; tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = LRI->second; - bool PartialRedef = MI->getOperand(OpNum).getSubReg(); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; // It's a copy, use the destination register as a hint. - if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg), - SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) Hint = DstReg; } allocVirtReg(MI, *LRI, Hint); - // If this is only a partial redefinition, we must reload the other parts. - if (PartialRedef && MI->readsVirtualRegister(VirtReg)) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - int FI = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI); - ++NumLoads; - } - } else if (LR.LastUse && !PartialRedef) { + } else if (LR.LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) @@ -564,10 +564,16 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, } else if (LR.Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); - MO.setIsKill(); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); } else if (MO.isKill()) { DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); } } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the @@ -576,6 +582,9 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); } assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; @@ -607,6 +616,91 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { return MO.isDead(); } +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg)) + DEBUG(dbgs() << " %reg" << Reg); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + if (ThroughRegs.count(PhysRegState[Reg])) + definePhysReg(MI, Reg, regFree); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + UsedInInstr.set(*AS); + if (ThroughRegs.count(PhysRegState[*AS])) + definePhysReg(MI, *AS, regFree); + } + } + + SmallVector PartialDefs; + DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->second.PhysReg); + } else if (MO.isEarlyClobber()) { + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + UsedInInstr.set(PartialDefs[i]); +} + void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); @@ -620,7 +714,7 @@ void RAFast::AllocateBasicBlock() { E = MBB->livein_end(); I != E; ++I) definePhysReg(MII, *I, regReserved); - SmallVector PhysECs, VirtDead; + SmallVector VirtDead; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. @@ -670,8 +764,25 @@ void RAFast::AllocateBasicBlock() { LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); - else - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + } + } } // Next instruction. continue; @@ -679,17 +790,25 @@ void RAFast::AllocateBasicBlock() { // If this is a copy, we may be able to coalesce. unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; - if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) CopySrc = CopyDst = 0; // Track registers used by instruction. UsedInInstr.reset(); - PhysECs.clear(); // First scan. // Mark physreg uses and early clobbers as used. // Find the end of the virtreg operands unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -697,20 +816,44 @@ void RAFast::AllocateBasicBlock() { if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } continue; } if (!Allocatable.test(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - PhysECs.push_back(Reg); - } + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && hasPhysDefs)) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; } // Second scan. - // Allocate virtreg uses and early clobbers. - // Collect VirtKills + // Allocate virtreg uses. for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -722,12 +865,6 @@ void RAFast::AllocateBasicBlock() { CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - setPhysReg(MI, i, PhysReg); - PhysECs.push_back(PhysReg); } } @@ -735,12 +872,16 @@ void RAFast::AllocateBasicBlock() { // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); - for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) { - unsigned PhysReg = PhysECs[i]; - UsedInInstr.set(PhysReg); - for (const unsigned *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) - UsedInInstr.set(Alias); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } } unsigned DefOpEnd = MI->getNumOperands(); @@ -752,13 +893,18 @@ void RAFast::AllocateBasicBlock() { DefOpEnd = VirtOpEnd; DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&TID); } // Third scan. // Allocate defs and collect dead defs. for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -837,6 +983,14 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // Make sure the set of used physregs is closed under subreg operations. MRI->closePhysRegsUsed(*TRI); + // Add the clobber lists for all the instructions we skipped earlier. + for (SmallPtrSet::const_iterator + I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) + if (const unsigned *Defs = (*I)->getImplicitDefs()) + while (*Defs) + MRI->setPhysRegUsed(*Defs++); + + SkippedInstrs.clear(); StackSlotForVirtReg.clear(); return true; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index bc331f0..044672d 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -83,7 +83,8 @@ namespace { // pressure, it can caused fewer GPRs to be held in the queue. static cl::opt NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember to skip."), + cl::desc("Number of registers for linearscan to remember" + "to skip."), cl::init(0), cl::Hidden); @@ -421,9 +422,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (vni->def != SlotIndex() && vni->isDefAccurate() && (CopyMI = li_->getInstructionFromIndex(vni->def)) && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + (CopyMI->isCopy() || + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))) // Defined by a copy, try to extend SrcReg forward - CandReg = SrcReg; + CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg; else if (TrivCoalesceEnds && (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && @@ -992,6 +994,24 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) mri_->setRegAllocationHint(cur->reg, 0, Reg); } + } else if (CopyMI && CopyMI->isCopy()) { + DstReg = CopyMI->getOperand(0).getReg(); + DstSubReg = CopyMI->getOperand(0).getSubReg(); + SrcReg = CopyMI->getOperand(1).getReg(); + SrcSubReg = CopyMI->getOperand(1).getSubReg(); + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + mri_->setRegAllocationHint(cur->reg, 0, Reg); + } } } } @@ -1206,8 +1226,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector spillIs; std::vector added; - - added = spiller_->spill(cur, spillIs); + spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); addStackInterval(cur, ls_, li_, mri_, *vrm_); @@ -1285,10 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - std::vector newIs; - newIs = spiller_->spill(sli, spillIs, &earliestStart); + spiller_->spill(sli, added, spillIs, &earliestStart); addStackInterval(sli, ls_, li_, mri_, *vrm_); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp deleted file mode 100644 index 321ae12..0000000 --- a/lib/CodeGen/RegAllocLocal.cpp +++ /dev/null @@ -1,1254 +0,0 @@ -//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This register allocator allocates registers to a basic block at a time, -// attempting to keep values in registers and reusing registers as appropriate. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "llvm/BasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include -using namespace llvm; - -STATISTIC(NumStores, "Number of stores added"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumCopies, "Number of copies coalesced"); - -static RegisterRegAlloc - localRegAlloc("local", "local register allocator", - createLocalRegisterAllocator); - -namespace { - class RALocal : public MachineFunctionPass { - public: - static char ID; - RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} - private: - const TargetMachine *TM; - MachineFunction *MF; - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // StackSlotForVirtReg - Maps virtual regs to the frame index where these - // values are spilled. - IndexedMap StackSlotForVirtReg; - - // Virt2PhysRegMap - This map contains entries for each virtual register - // that is currently available in a physical register. - IndexedMap Virt2PhysRegMap; - - unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { - return Virt2PhysRegMap[VirtReg]; - } - - // PhysRegsUsed - This array is effectively a map, containing entries for - // each physical register that currently has a value (ie, it is in - // Virt2PhysRegMap). The value mapped to is the virtual register - // corresponding to the physical register (the inverse of the - // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned - // because it is used by a future instruction, and to -2 if it is not - // allocatable. If the entry for a physical register is -1, then the - // physical register is "not in the map". - // - std::vector PhysRegsUsed; - - // PhysRegsUseOrder - This contains a list of the physical registers that - // currently have a virtual register value in them. This list provides an - // ordering of registers, imposing a reallocation order. This list is only - // used if all registers are allocated and we have to spill one, in which - // case we spill the least recently used register. Entries at the front of - // the list are the least recently used registers, entries at the back are - // the most recently used. - // - std::vector PhysRegsUseOrder; - - // Virt2LastUseMap - This maps each virtual register to its last use - // (MachineInstr*, operand index pair). - IndexedMap, VirtReg2IndexFunctor> - Virt2LastUseMap; - - std::pair& getVirtRegLastUse(unsigned Reg) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - return Virt2LastUseMap[Reg]; - } - - // VirtRegModified - This bitset contains information about which virtual - // registers need to be spilled back to memory when their registers are - // scavenged. If a virtual register has simply been rematerialized, there - // is no reason to spill it to memory when we need the register back. - // - BitVector VirtRegModified; - - // UsedInMultipleBlocks - Tracks whether a particular register is used in - // more than one block. - BitVector UsedInMultipleBlocks; - - void markVirtRegModified(unsigned Reg, bool Val = true) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - Reg -= TargetRegisterInfo::FirstVirtualRegister; - if (Val) - VirtRegModified.set(Reg); - else - VirtRegModified.reset(Reg); - } - - bool isVirtRegModified(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < - VirtRegModified.size() && "Illegal virtual register!"); - return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; - } - - void AddToPhysRegsUseOrder(unsigned Reg) { - std::vector::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); - PhysRegsUseOrder.push_back(Reg); - } - - void MarkPhysRegRecentlyUsed(unsigned Reg) { - if (PhysRegsUseOrder.empty() || - PhysRegsUseOrder.back() == Reg) return; // Already most recently used - - for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) { - unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle - if (!areRegsEqual(Reg, RegMatch)) continue; - - PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); - // Add it to the end of the list - PhysRegsUseOrder.push_back(RegMatch); - if (RegMatch == Reg) - return; // Found an exact match, exit early - } - } - - public: - virtual const char *getPassName() const { - return "Local Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - /// runOnMachineFunction - Register allocate the whole function - bool runOnMachineFunction(MachineFunction &Fn); - - /// AllocateBasicBlock - Register allocate the specified basic block. - void AllocateBasicBlock(MachineBasicBlock &MBB); - - - /// areRegsEqual - This method returns true if the specified registers are - /// related to each other. To do this, it checks to see if they are equal - /// or if the first register is in the alias set of the second register. - /// - bool areRegsEqual(unsigned R1, unsigned R2) const { - if (R1 == R2) return true; - for (const unsigned *AliasSet = TRI->getAliasSet(R2); - *AliasSet; ++AliasSet) { - if (*AliasSet == R1) return true; - } - return false; - } - - /// getStackSpaceFor - This returns the frame index of the specified virtual - /// register on the stack, allocating space if necessary. - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - - /// removePhysReg - This method marks the specified physical register as no - /// longer being in use. - /// - void removePhysReg(unsigned PhysReg); - - void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, bool isKill); - - /// spillVirtReg - This method spills the value specified by PhysReg into - /// the virtual register slot specified by VirtReg. It then updates the RA - /// data structures to indicate the fact that PhysReg is now available. - /// - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, unsigned PhysReg); - - /// spillPhysReg - This method spills the specified physical register into - /// the virtual register slot associated with it. If OnlyVirtRegs is set to - /// true, then the request is ignored if the physical register does not - /// contain a virtual register. - /// - void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs = false); - - /// assignVirtToPhysReg - This method updates local state so that we know - /// that PhysReg is the proper container for VirtReg now. The physical - /// register must not be used for anything else when this is called. - /// - void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - - /// isPhysRegAvailable - Return true if the specified physical register is - /// free and available for use. This also includes checking to see if - /// aliased registers are all free... - /// - bool isPhysRegAvailable(unsigned PhysReg) const; - - /// getFreeReg - Look to see if there is a free register available in the - /// specified register class. If not, return 0. - /// - unsigned getFreeReg(const TargetRegisterClass *RC); - - /// getReg - Find a physical register to hold the specified virtual - /// register. If all compatible physical registers are used, this method - /// spills the last used virtual register to the stack, and uses that - /// register. If NoFree is true, that means the caller knows there isn't - /// a free register, do not call getFreeReg(). - unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, bool NoFree = false); - - /// reloadVirtReg - This method transforms the specified virtual - /// register use to refer to a physical register. This method may do this - /// in one of several ways: if the register is available in a physical - /// register already, it uses that physical register. If the value is not - /// in a physical register, and if there are physical registers available, - /// it loads it into a register: PhysReg if that is an available physical - /// register, otherwise any physical register of the right class. - /// If register pressure is high, and it is possible, it tries to fold the - /// load of the virtual register into the instruction itself. It avoids - /// doing this if register pressure is low to improve the chance that - /// subsequent instructions can use the reloaded value. This method - /// returns the modified instruction. - /// - MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet &RRegs, - unsigned PhysReg); - - /// ComputeLocalLiveness - Computes liveness of registers within a basic - /// block, setting the killed/dead flags as appropriate. - void ComputeLocalLiveness(MachineBasicBlock& MBB); - - void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, - unsigned PhysReg); - }; - char RALocal::ID = 0; -} - -/// getStackSpaceFor - This allocates space for the specified virtual register -/// to be held on the stack. -int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { - // Find the location Reg would belong... - int SS = StackSlotForVirtReg[VirtReg]; - if (SS != -1) - return SS; // Already has space allocated? - - // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); - - // Assign the slot. - StackSlotForVirtReg[VirtReg] = FrameIdx; - return FrameIdx; -} - - -/// removePhysReg - This method marks the specified physical register as no -/// longer being in use. -/// -void RALocal::removePhysReg(unsigned PhysReg) { - PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used - - std::vector::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); -} - -/// storeVirtReg - Store a virtual register to its assigned stack slot. -void RALocal::storeVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, - bool isKill) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); - ++NumStores; // Update statistics - - // Mark the spill instruction as last use if we're not killing the register. - if (!isKill) { - MachineInstr *Spill = llvm::prior(I); - int OpNum = Spill->findRegisterUseOperandIdx(PhysReg); - if (OpNum < 0) - getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0); - else - getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum); - } -} - -/// spillVirtReg - This method spills the value specified by PhysReg into the -/// virtual register slot specified by VirtReg. It then updates the RA data -/// structures to indicate the fact that PhysReg is now available. -/// -void RALocal::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg) { - assert(VirtReg && "Spilling a physical register is illegal!" - " Must not have appropriate kill for the register or use exists beyond" - " the intended one."); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg); - - if (!isVirtRegModified(VirtReg)) { - DEBUG(dbgs() << " which has not been modified, so no store necessary!"); - std::pair &LastUse = getVirtRegLastUse(VirtReg); - if (LastUse.first) - LastUse.first->getOperand(LastUse.second).setIsKill(); - } else { - // Otherwise, there is a virtual register corresponding to this physical - // register. We only need to spill it into its stack slot if it has been - // modified. - // If the instruction reads the register that's spilled, (e.g. this can - // happen if it is a move to a physical register), then the spill - // instruction is not a kill. - bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - storeVirtReg(MBB, I, VirtReg, PhysReg, isKill); - } - - getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - - DEBUG(dbgs() << '\n'); - removePhysReg(PhysReg); -} - - -/// spillPhysReg - This method spills the specified physical register into the -/// virtual register slot associated with it. If OnlyVirtRegs is set to true, -/// then the request is ignored if the physical register does not contain a -/// virtual register. -/// -void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs) { - if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! - assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); - if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) - spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); - return; - } - - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. - PhysRegsUsed[*AliasSet] == -2) // If allocatable. - continue; - - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); - } -} - - -/// assignVirtToPhysReg - This method updates local state so that we know -/// that PhysReg is the proper container for VirtReg now. The physical -/// register must not be used for anything else when this is called. -/// -void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); - // Update information to note the fact that this register was just used, and - // it holds VirtReg. - PhysRegsUsed[PhysReg] = VirtReg; - getVirt2PhysRegMapSlot(VirtReg) = PhysReg; - AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg -} - - -/// isPhysRegAvailable - Return true if the specified physical register is free -/// and available for use. This also includes checking to see if aliased -/// registers are all free... -/// -bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { - if (PhysRegsUsed[PhysReg] != -1) return false; - - // If the selected register aliases any other allocated registers, it is - // not free! - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? - return false; // Can't use this reg then. - return true; -} - - -/// getFreeReg - Look to see if there is a free register available in the -/// specified register class. If not, return 0. -/// -unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { - // Get iterators defining the range of registers that are valid to allocate in - // this class, which also specifies the preferred allocation order. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegAvailable(*RI)) { // Is reg unused? - assert(*RI != 0 && "Cannot use register!"); - return *RI; // Found an unused register! - } - return 0; -} - - -/// getReg - Find a physical register to hold the specified virtual -/// register. If all compatible physical registers are used, this method spills -/// the last used virtual register to the stack, and uses that register. -/// -unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned VirtReg, bool NoFree) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - - // First check to see if we have a free register of the requested type... - unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); - - if (PhysReg != 0) { - // Assign the register. - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; - } - - // If we didn't find an unused register, scavenge one now! - assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); - - // Loop over all of the preallocated registers from the least recently used - // to the most recently used. When we find one that is capable of holding - // our register, use it. - for (unsigned i = 0; PhysReg == 0; ++i) { - assert(i != PhysRegsUseOrder.size() && - "Couldn't find a register of the appropriate class!"); - - unsigned R = PhysRegsUseOrder[i]; - - // We can only use this register if it holds a virtual register (ie, it - // can be spilled). Do not use it if it is an explicitly allocated - // physical register! - assert(PhysRegsUsed[R] != -1 && - "PhysReg in PhysRegsUseOrder, but is not allocated?"); - if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { - // If the current register is compatible, use it. - if (RC->contains(R)) { - PhysReg = R; - break; - } - - // If one of the registers aliased to the current register is - // compatible, use it. - for (const unsigned *AliasIt = TRI->getAliasSet(R); - *AliasIt; ++AliasIt) { - if (!RC->contains(*AliasIt)) continue; - - // If this is pinned down for some reason, don't use it. For - // example, if CL is pinned, and we run across CH, don't use - // CH as justification for using scavenging ECX (which will - // fail). - if (PhysRegsUsed[*AliasIt] == 0) continue; - - // Make sure the register is allocatable. Don't allocate SIL on - // x86-32. - if (PhysRegsUsed[*AliasIt] == -2) continue; - - PhysReg = *AliasIt; // Take an aliased register - break; - } - } - } - - assert(PhysReg && "Physical register not assigned!?!?"); - - // At this point PhysRegsUseOrder[i] is the least recently used register of - // compatible register class. Spill it to memory and reap its remains. - spillPhysReg(MBB, I, PhysReg); - - // Now that we know which register we need to assign this to, do it now! - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; -} - - -/// reloadVirtReg - This method transforms the specified virtual -/// register use to refer to a physical register. This method may do this in -/// one of several ways: if the register is available in a physical register -/// already, it uses that physical register. If the value is not in a physical -/// register, and if there are physical registers available, it loads it into a -/// register: PhysReg if that is an available physical register, otherwise any -/// register. If register pressure is high, and it is possible, it tries to -/// fold the load of the virtual register into the instruction itself. It -/// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns -/// the modified instruction. -/// -MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, - SmallSet &ReloadedRegs, - unsigned PhysReg) { - unsigned VirtReg = MI->getOperand(OpNum).getReg(); - unsigned SubIdx = MI->getOperand(OpNum).getSubReg(); - - // If the virtual register is already available, just update the instruction - // and return. - if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - if (SubIdx) { - PR = TRI->getSubReg(PR, SubIdx); - MI->getOperand(OpNum).setSubReg(0); - } - MI->getOperand(OpNum).setReg(PR); // Assign the input register - if (!MI->isDebugValue()) { - // Do not do these for DBG_VALUE as they can affect codegen. - MarkPhysRegRecentlyUsed(PR); // Already have this value available! - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - } - return MI; - } - - // Otherwise, we need to fold it into the current instruction, or reload it. - // If we have registers available to hold the value, use them. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - // If we already have a PhysReg (this happens when the instruction is a - // reg-to-reg copy with a PhysReg destination) use that. - if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || - !isPhysRegAvailable(PhysReg)) - PhysReg = getFreeReg(RC); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - - if (PhysReg) { // Register is available, allocate it! - assignVirtToPhysReg(VirtReg, PhysReg); - } else { // No registers available. - // Force some poor hapless value out of the register file to - // make room for the new register, and reload it. - PhysReg = getReg(MBB, MI, VirtReg, true); - } - - markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); - - // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); - ++NumLoads; // Update statistics - - MF->getRegInfo().setPhysRegUsed(PhysReg); - // Assign the input register. - if (SubIdx) { - MI->getOperand(OpNum).setSubReg(0); - MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx)); - } else - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - - if (!ReloadedRegs.insert(PhysReg)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (ReloadedRegs.insert(*SubRegs)) continue; - - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - - return MI; -} - -/// isReadModWriteImplicitKill - True if this is an implicit kill for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - MO.isDef() && !MO.isDead()) - return true; - } - return false; -} - -/// isReadModWriteImplicitDef - True if this is an implicit def for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - !MO.isDef() && MO.isKill()) - return true; - } - return false; -} - -// precedes - Helper function to determine with MachineInstr A -// precedes MachineInstr B within the same MBB. -static bool precedes(MachineBasicBlock::iterator A, - MachineBasicBlock::iterator B) { - if (A == B) - return false; - - MachineBasicBlock::iterator I = A->getParent()->begin(); - while (I != A->getParent()->end()) { - if (I == A) - return true; - else if (I == B) - return false; - - ++I; - } - - return false; -} - -/// ComputeLocalLiveness - Computes liveness of registers within a basic -/// block, setting the killed/dead flags as appropriate. -void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - // Keep track of the most recently seen previous use or def of each reg, - // so that we can update them with dead/kill markers. - DenseMap > LastUseDef; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Uses don't trigger any flags, but we need to save - // them for later. Also, we have to process these - // _before_ processing the defs, since an instr - // uses regs before it defs them. - if (!MO.isReg() || !MO.getReg() || !MO.isUse()) - continue; - - // Ignore helpful kill flags from earlier passes. - MO.setIsKill(false); - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; - - const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); - if (Aliases == 0) - continue; - - while (*Aliases) { - DenseMap >::iterator - alias = LastUseDef.find(*Aliases); - - if (alias != LastUseDef.end() && alias->second.first != I) - LastUseDef[*Aliases] = std::make_pair(I, i); - - ++Aliases; - } - } - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Defs others than 2-addr redefs _do_ trigger flag changes: - // - A def followed by a def is dead - // - A use followed by a def is a kill - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; - - unsigned SubIdx = MO.getSubReg(); - DenseMap >::iterator - last = LastUseDef.find(MO.getReg()); - if (last != LastUseDef.end()) { - // Check if this is a two address instruction. If so, then - // the def does not kill the use. - if (last->second.first == I && I->isRegTiedToUseOperand(i)) - continue; - - MachineOperand &lastUD = - last->second.first->getOperand(last->second.second); - if (SubIdx && lastUD.getSubReg() != SubIdx) - // Partial re-def, the last def is not dead. - // %reg1024:5 = - // %reg1024:6 = - // or - // %reg1024:5 = op %reg1024, 5 - continue; - - if (lastUD.isDef()) - lastUD.setIsDead(true); - else - lastUD.setIsKill(true); - } - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - } - } - - // Live-out (of the function) registers contain return values of the function, - // so we need to make sure they are alive at return time. - MachineBasicBlock::iterator Ret = MBB.getFirstTerminator(); - bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn()); - - if (BBEndsInReturn) - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) - if (!Ret->readsRegister(*I)) { - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); - } - - // Finally, loop over the final use/def of each reg - // in the block and determine if it is dead. - for (DenseMap >::iterator - I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { - MachineInstr *MI = I->second.first; - unsigned idx = I->second.second; - MachineOperand &MO = MI->getOperand(idx); - - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); - - // A crude approximation of "live-out" calculation - bool usedOutsideBlock = isPhysReg ? false : - UsedInMultipleBlocks.test(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - - // If the machine BB ends in a return instruction, then the value isn't used - // outside of the BB. - if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) { - // DBG_VALUE complicates this: if the only refs of a register outside - // this block are DBG_VALUE, we can't keep the reg live just for that, - // as it will cause the reg to be spilled at the end of this block when - // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that - // happens. - bool UsedByDebugValueOnly = false; - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) { - // Two cases: - // - used in another block - // - used in the same block before it is defined (loop) - if (UI->getParent() == &MBB && - !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) - continue; - - if (UI->isDebugValue()) { - UsedByDebugValueOnly = true; - continue; - } - - // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. - UsedInMultipleBlocks.set(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - usedOutsideBlock = true; - UsedByDebugValueOnly = false; - break; - } - - if (UsedByDebugValueOnly) - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) - if (UI->isDebugValue() && - (UI->getParent() != &MBB || - (MO.isDef() && precedes(&*UI, MI)))) - UI.getOperand().setReg(0U); - } - - // Physical registers and those that are not live-out of the block are - // killed/dead at their last use/def within this block. - if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { - if (MO.isUse()) { - // Don't mark uses that are tied to defs as kills. - if (!MI->isRegTiedToDefOperand(idx)) - MO.setIsKill(true); - } else { - MO.setIsDead(true); - } - } - } -} - -void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { - // loop over each instruction - MachineBasicBlock::iterator MII = MBB.begin(); - - DEBUG({ - const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) - dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); - }); - - // Add live-in registers as active. - for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - unsigned Reg = *I; - MF->getRegInfo().setPhysRegUsed(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - - ComputeLocalLiveness(MBB); - - // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB.end()) { - MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); - DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI; - dbgs() << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i])) - dbgs() << "*"; - dbgs() << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; - } - dbgs() << '\n'; - }); - - // Determine whether this is a copy instruction. The cases where the - // source or destination are phys regs are handled specially. - unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; - unsigned SrcCopyPhysReg = 0U; - bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopySubReg == DstCopySubReg; - if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) - SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); - - // Loop over the implicit uses, making sure that they are at the head of the - // use order list, so they don't get reallocated. - if (TID.ImplicitUses) { - for (const unsigned *ImplicitUses = TID.ImplicitUses; - *ImplicitUses; ++ImplicitUses) - MarkPhysRegRecentlyUsed(*ImplicitUses); - } - - SmallVector Kills; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } - - // If any physical regs are earlyclobber, spill any value they might - // have in them, then mark them unallocatable. - // If any virtual regs are earlyclobber, allocate them now (before - // freeing inputs that are killed). - if (MI->isInlineAsm()) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || - !MO.getReg()) - continue; - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - } - } - - // If a DBG_VALUE says something is located in a spilled register, - // change the DBG_VALUE to be undef, which prevents the register - // from being reloaded here. Doing that would change the generated - // code, unless another use immediately follows this instruction. - if (MI->isDebugValue() && - MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned VirtReg = MI->getOperand(0).getReg(); - if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && - !getVirt2PhysRegMapSlot(VirtReg)) - MI->getOperand(0).setReg(0U); - } - - // Get the used operands into registers. This has the potential to spill - // incoming values if we are out of registers. Note that we completely - // ignore physical register uses here. We assume that if an explicit - // physical register is referenced by the instruction, that it is guaranteed - // to be live-in, or the input is badly hosed. - // - SmallSet ReloadedRegs; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - // here we are looking for only used operands (never def&use) - if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, - isCopy ? DstCopyReg : 0); - } - - // If this instruction is the last user of this register, kill the - // value, freeing the register being used, so it doesn't need to be - // spilled to memory. - // - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned VirtReg = Kills[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // If the virtual register was never materialized into a register, it - // might not be in the map, but it won't hurt to zero it out anyway. - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else { - assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && - "Silently clearing a virtual register?"); - } - - if (!PhysReg) continue; - - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } - } - } - - // Loop over all of the operands of the instruction, spilling registers that - // are defined, and marking explicit destinations in the PhysRegsUsed map. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - - // Loop over the implicit defs, spilling them as well. - if (TID.ImplicitDefs) { - for (const unsigned *ImplicitDefs = TID.ImplicitDefs; - *ImplicitDefs; ++ImplicitDefs) { - unsigned Reg = *ImplicitDefs; - if (PhysRegsUsed[Reg] != -2) { - spillPhysReg(MBB, MI, Reg, true); - AddToPhysRegsUseOrder(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - } - MF->getRegInfo().setPhysRegUsed(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - } - - SmallVector DeadDefs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead()) - DeadDefs.push_back(MO.getReg()); - } - - // Okay, we have allocated all of the source operands and spilled any values - // that would be destroyed by defs of this instruction. Loop over the - // explicit defs and assign them to a register, spilling incoming values if - // we need to scavenge a register. - // - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the output register - } - - // If this instruction defines any registers that are immediately dead, - // kill them now. - // - for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { - unsigned VirtReg = DeadDefs[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - assert(PhysReg != 0); - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else if (!PhysReg) - continue; - - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } - } - } - - // If this instruction is a call, make sure there are no dirty registers. The - // call might throw an exception, and the landing pad expects to find all - // registers in stack slots. - if (TID.isCall()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { - if (PhysRegsUsed[i] <= 0) continue; - unsigned VirtReg = PhysRegsUsed[i]; - if (!isVirtRegModified(VirtReg)) continue; - DEBUG(dbgs() << " Storing dirty %reg" << VirtReg); - storeVirtReg(MBB, MI, VirtReg, i, false); - markVirtRegModified(VirtReg, false); - DEBUG(dbgs() << " because the call might throw\n"); - } - - // Finally, if this is a noop copy instruction, zap it. (Except that if - // the copy is dead, it must be kept to avoid messing up liveness info for - // the register scavenger. See pr4100.) - if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && - DeadDefs.empty()) { - ++NumCopies; - MBB.erase(MI); - } - } - - MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - - // Spill all physical registers holding virtual registers now. - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (unsigned VirtReg = PhysRegsUsed[i]) - spillVirtReg(MBB, MI, VirtReg, i); - else - removePhysReg(i); - } - -#if 0 - // This checking code is very expensive. - bool AllOk = true; - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) - if (unsigned PR = Virt2PhysRegMap[i]) { - cerr << "Register still mapped: " << i << " -> " << PR << "\n"; - AllOk = false; - } - assert(AllOk && "Virtual registers still in phys regs?"); -#endif - - // Clear any physical register which appear live at the end of the basic - // block, but which do not hold any virtual registers. e.g., the stack - // pointer. - PhysRegsUseOrder.clear(); -} - -/// runOnMachineFunction - Register allocate the whole function -/// -bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(dbgs() << "Machine Function\n"); - MF = &Fn; - MRI = &Fn.getRegInfo(); - TM = &Fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - - PhysRegsUsed.assign(TRI->getNumRegs(), -1); - - // At various places we want to efficiently check to see whether a register - // is allocatable. To handle this, we mark all unallocatable registers as - // being pinned down, permanently. - { - BitVector Allocable = TRI->getAllocatableSet(Fn); - for (unsigned i = 0, e = Allocable.size(); i != e; ++i) - if (!Allocable[i]) - PhysRegsUsed[i] = -2; // Mark the reg unallocable. - } - - // initialize the virtual->physical register map to have a 'null' - // mapping for all virtual registers - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); - StackSlotForVirtReg.grow(LastVirtReg); - Virt2PhysRegMap.grow(LastVirtReg); - Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - - // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) - AllocateBasicBlock(*MBB); - - StackSlotForVirtReg.clear(); - PhysRegsUsed.clear(); - VirtRegModified.clear(); - UsedInMultipleBlocks.clear(); - Virt2PhysRegMap.clear(); - Virt2LastUseMap.clear(); - return true; -} - -FunctionPass *llvm::createLocalRegisterAllocator() { - return new RALocal(); -} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 4fafd28..7e61a12 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -396,28 +396,23 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { if (srcRegIsPhysical && dstRegIsPhysical) continue; - // If it's a copy that includes a virtual register but the source and - // destination classes differ then we can't coalesce, so continue with - // the next instruction. - const TargetRegisterClass *srcRegClass = srcRegIsPhysical ? - tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg); - - const TargetRegisterClass *dstRegClass = dstRegIsPhysical ? - tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg); - - if (srcRegClass != dstRegClass) + // If it's a copy that includes two virtual register but the source and + // destination classes differ then we can't coalesce. + if (!srcRegIsPhysical && !dstRegIsPhysical && + mri->getRegClass(srcReg) != mri->getRegClass(dstReg)) continue; - // We also need any physical regs to be allocable, coalescing with - // a non-allocable register is invalid. - if (srcRegIsPhysical) { + // If one is physical and one is virtual, check that the physical is + // allocatable in the class of the virtual. + if (srcRegIsPhysical && !dstRegIsPhysical) { + const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg); if (std::find(dstRegClass->allocation_order_begin(*mf), dstRegClass->allocation_order_end(*mf), srcReg) == dstRegClass->allocation_order_end(*mf)) continue; } - - if (dstRegIsPhysical) { + if (!srcRegIsPhysical && dstRegIsPhysical) { + const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg); if (std::find(srcRegClass->allocation_order_begin(*mf), srcRegClass->allocation_order_end(*mf), dstReg) == srcRegClass->allocation_order_end(*mf)) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1131e3d..ab0bc2d 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Pass.h" @@ -33,6 +35,160 @@ char RegisterCoalescer::ID = 0; // RegisterCoalescer::~RegisterCoalescer() {} +unsigned CoalescerPair::compose(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return tri_.composeSubRegIndices(a, b); +} + +bool CoalescerPair::isMoveInstr(const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + srcReg_ = dstReg_ = subIdx_ = 0; + newRC_ = 0; + flipped_ = crossClass_ = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + partial_ = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + flipped_ = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = tri_.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // For now we only handle the case of identical indices in commensurate + // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg + // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. + if (SrcSub != DstSub) + return false; + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (!getCommonSubClass(DstRC, SrcRC)) + return false; + SrcSub = DstSub = 0; + } + + // There can be no SrcSub. + if (SrcSub) { + std::swap(Src, Dst); + DstSub = SrcSub; + SrcSub = 0; + assert(!flipped_ && "Unexpected flip"); + flipped_ = true; + } + + // Find the new register class. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (DstSub) + newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + else + newRC_ = getCommonSubClass(DstRC, SrcRC); + if (!newRC_) + return false; + crossClass_ = newRC_ != DstRC || newRC_ != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + srcReg_ = Src; + dstReg_ = Dst; + subIdx_ = DstSub; + return true; +} + +bool CoalescerPair::flip() { + if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + return false; + std::swap(srcReg_, dstReg_); + flipped_ = !flipped_; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is srcReg_. + if (Dst == srcReg_) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != srcReg_) { + return false; + } + + // Now check that Dst matches dstReg_. + if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!subIdx_ && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = tri_.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return dstReg_ == Dst; + // This is a partial register copy. Check that the parts match. + return tri_.getSubReg(dstReg_, SrcSub) == Dst; + } else { + // dstReg_ is virtual. + if (dstReg_ != Dst) + return false; + // Registers match, do the subregisters line up? + return compose(subIdx_, SrcSub) == DstSub; + } +} + // Because of the way .a files work, we must force the SimpleRC // implementation to be pulled in if the RegisterCoalescer classes are // pulled in. Otherwise we run the risk of RegisterCoalescer being diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 690e59f..43b3fb6 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -141,6 +141,10 @@ void RegScavenger::forward() { // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); BitVector EarlyClobberRegs(NumPhysRegs); BitVector KillRegs(NumPhysRegs); BitVector DefRegs(NumPhysRegs); @@ -155,11 +159,11 @@ void RegScavenger::forward() { if (MO.isUse()) { // Two-address operands implicitly kill. - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); - if (MO.isDead()) + if (!isPred && MO.isDead()) addRegWithSubRegs(DeadRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); @@ -238,8 +242,18 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { return 0; } +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, + BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + Mask.set(*I); +} + /// findSurvivorReg - Return the candidate register that is unused for the -/// longest after MBBI. UseMI is set to the instruction where the search +/// longest after StargMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. @@ -258,6 +272,10 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. @@ -321,13 +339,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, Candidates.reset(MO.getReg()); } + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + if ((Candidates & RegsAvailable).any()) + Candidates &= RegsAvailable; + // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - // If we found an unused register there is no reason to spill it. We have - // probably found a callee-saved register that has been saved in the - // prologue, but happens to be unused at this point. + // If we found an unused register there is no reason to spill it. if (!isAliasUsed(SReg)) return SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index da20c12..7d39dc4 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -380,26 +380,26 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } #endif -/// InitDAGTopologicalSorting - create the initial topological +/// InitDAGTopologicalSorting - create the initial topological /// ordering from the DAG to be scheduled. /// -/// The idea of the algorithm is taken from +/// The idea of the algorithm is taken from /// "Online algorithms for managing the topological order of /// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly -/// This is the MNR algorithm, which was first introduced by -/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in /// "Maintaining a topological order under edge insertions". /// -/// Short description of the algorithm: +/// Short description of the algorithm: /// /// Topological ordering, ord, of a DAG maps each node to a topological /// index so that for all edges X->Y it is the case that ord(X) < ord(Y). /// -/// This means that if there is a path from the node X to the node Z, +/// This means that if there is a path from the node X to the node Z, /// then ord(X) < ord(Z). /// /// This property can be used to check for reachability of nodes: -/// if Z is reachable from X, then an insertion of the edge Z->X would +/// if Z is reachable from X, then an insertion of the edge Z->X would /// create a cycle. /// /// The algorithm first computes a topological ordering for the DAG by @@ -431,7 +431,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { // Collect leaf nodes. WorkList.push_back(SU); } - } + } int Id = DAGSize; while (!WorkList.empty()) { @@ -456,7 +456,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { SUnit *SU = &SUnits[i]; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && "Wrong topological sorting"); } } @@ -494,7 +494,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, bool& HasLoop) { std::vector WorkList; - WorkList.reserve(SUnits.size()); + WorkList.reserve(SUnits.size()); WorkList.push_back(SU); do { @@ -504,20 +504,20 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, for (int I = SU->Succs.size()-1; I >= 0; --I) { int s = SU->Succs[I].getSUnit()->NodeNum; if (Node2Index[s] == UpperBound) { - HasLoop = true; + HasLoop = true; return; } // Visit successors if not already and in affected region. if (!Visited.test(s) && Node2Index[s] < UpperBound) { WorkList.push_back(SU->Succs[I].getSUnit()); - } - } + } + } } while (!WorkList.empty()); } -/// Shift - Renumber the nodes so that the topological ordering is +/// Shift - Renumber the nodes so that the topological ordering is /// preserved. -void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, int UpperBound) { std::vector L; int shift = 0; @@ -568,7 +568,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, // Is Ord(TargetSU) < Ord(SU) ? if (LowerBound < UpperBound) { Visited.reset(); - // There may be a path from TargetSU to SU. Check for it. + // There may be a path from TargetSU to SU. Check for it. DFS(TargetSU, UpperBound, HasLoop); } return HasLoop; @@ -580,8 +580,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) { Index2Node[index] = n; } -ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( - std::vector &sunits) - : SUnits(sunits) {} +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector &sunits) : SUnits(sunits) {} ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index ee08e1d..0a2fb37 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -50,11 +50,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, break; } } - bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); @@ -62,11 +59,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); - bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); } break; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index ad82db2..d90659b 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -69,8 +69,10 @@ namespace llvm { const SmallSet &LoopLiveIns) { unsigned Count = 0; for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I, ++Count) { + I != E; ++I) { const MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -79,6 +81,7 @@ namespace llvm { if (LoopLiveIns.count(MOReg)) Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); } + ++Count; // Not every iteration due to dbg_value above. } const std::vector &Children = Node->getChildren(); diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 0cfd5e1..799988a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMSelectionDAG - CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp FunctionLoweringInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp deleted file mode 100644 index 4e6c1fc..0000000 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CCState class, used for lowering and implementing -// calling conventions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, - SmallVector &locs, LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { - // No stack is used. - StackOffset = 0; - - UsedRegs.resize((TRI.getNumRegs()+31)/32); -} - -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. -void CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); - unsigned Size = ArgFlags.getByValSize(); - if (MinSize > (int)Size) - Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - unsigned Offset = AllocateStack(Size, Align); - - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); -} - -/// MarkAllocated - Mark a register and all of its aliases as allocated. -void CCState::MarkAllocated(unsigned Reg) { - UsedRegs[Reg/32] |= 1 << (Reg&31); - - if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) - for (; (Reg = *RegAliases); ++RegAliases) - UsedRegs[Reg/32] |= 1 << (Reg&31); -} - -/// AnalyzeFormalArguments - Analyze an array of argument values, -/// incorporating info about the formals into this state. -void -CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, - CCAssignFn Fn) { - unsigned NumArgs = Ins.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; - ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// CheckReturn - Analyze the return values of a function, returning true if -/// the return can be performed without sret-demotion, and false otherwise. -bool CCState::CheckReturn(const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { - EVT VT = OutTys[i]; - ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) - return false; - } - return true; -} - -/// AnalyzeReturn - Analyze the returned values of a return, -/// incorporating info about the result values into this state. -void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - - -/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, - CCAssignFn Fn) { - unsigned NumOps = Outs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallOperands - Same as above except it takes vectors of types -/// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, - SmallVectorImpl &Flags, - CCAssignFn Fn) { - unsigned NumOps = ArgVTs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; - ISD::ArgFlagsTy ArgFlags = Flags[i]; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Analyze the return values of a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, - CCAssignFn Fn) { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; - ISD::ArgFlagsTy Flags = Ins[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { -#ifndef NDEBUG - dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Same as above except it's specialized for calls which -/// produce a single value. -void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { - if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { -#ifndef NDEBUG - dbgs() << "Call result has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } -} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bddd78..e671752 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -211,6 +211,7 @@ namespace { SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A @@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } @@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + // Here is a common situation. We want to optimize: // // %a = ... @@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && - (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), - N0.getValueType()) || - !TLI.isZExtFree(N0.getValueType(), VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), - VT, LN0->getChain(), LN0->getBasePtr(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + N->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); @@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), - LD->getValueType(0), + return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + N->getDebugLoc(), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); @@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), - LD->getValueType(0), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), @@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. - EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { @@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - return SDValue(); - EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getDebugLoc(), TheSelect->getValueType(0), + TheSelect->getDebugLoc(), LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), @@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && - N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast(N2.getOperand(0)); + EVT XType = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, - N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } - // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { - if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { - EVT XType = N0.getValueType(); - if (SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, - N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), - XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 95f4d07..3f7e4a5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,18 +44,38 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" -#include "FunctionLoweringInfo.h" using namespace llvm; +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Start out as null, meaining no local-value instructions have + // been emitted. + LastLocalValue = 0; + + // Advance the last local value past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + LastLocalValue = I; + ++I; + } +} + bool FastISel::hasTrivialKill(const Value *V) const { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast(V); @@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap::iterator I = ValueMap.find(V); - if (I != ValueMap.end()) - return I->second; + DenseMap::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) { + unsigned Reg = I->second; + return Reg; + } unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. - if (IsBottomUp) { - Reg = createResultReg(TLI.getRegClassFor(VT)); - if (isa(V)) - ValueMap[V] = Reg; - else - LocalValueMap[V] = Reg; - return Reg; - } + if (isa(V) && + (!isa(V) || + !FuncInfo.StaticAllocaMap.count(cast(V)))) + return FuncInfo.InitializeRegForValue(V); + + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); - return materializeRegForValue(V, VT); + leaveLocalValueArea(SaveInsertPt); + + return Reg; } /// materializeRegForValue - Helper for getRegForVale. This function is @@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { } } } else if (const Operator *Op = dyn_cast(V)) { - if (!SelectOperator(Op, Op->getOpcode())) return 0; - Reg = LocalValueMap[Op]; + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa(Op) || + !TargetSelectInstruction(cast(Op))) + return 0; + Reg = lookUpRegForValue(Op); } else if (isa(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific @@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) + if (Reg != 0) { LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } return Reg; } @@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; return LocalValueMap[V]; } @@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { return Reg; } - unsigned &AssignedReg = ValueMap[I]; + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) + // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { - const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); - TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass, DL); + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + FuncInfo.RegFixups[AssignedReg] = Reg; + + AssignedReg = Reg; } + return AssignedReg; } @@ -237,6 +273,33 @@ std::pair FastISel::getRegForGEPIndex(const Value *Idx) { return std::pair(IdxN, IdxNIsKill); } +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +MachineBasicBlock::iterator FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + recomputeInsertPt(); + return OldInsertPt; +} + +void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt; +} + /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// @@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); @@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(I); if (!DIVariable(DI->getVariable()).Verify() || - !MF.getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) return true; const Value *Address = DI->getAddress(); @@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) { // those are handled in SelectionDAGBuilder. if (AI) { DenseMap::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); } else // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. @@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast(V)) { - BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast(V)) { - BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. // Insert an undef so we can see what we dropped. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } return true; } @@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { - assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); unsigned Reg = TLI.getExceptionAddressRegister(); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); UpdateValueMap(I, ResultReg); return true; } @@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - if (MBB->isLandingPad()) - AddCatchInfo(*cast(I), &MF.getMMI(), MBB); + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); else { #ifndef NDEBUG - CatchInfoLost.insert(cast(I)); + FuncInfo.CatchInfoLost.insert(cast(I)); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } unsigned Reg = TLI.getExceptionSelectorRegister(); EVT SrcVT = TLI.getPointerTy(); const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); bool ResultRegIsKill = hasTrivialKill(I); @@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) { if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); - ResultReg = createResultReg(DstClass); - - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass, DL); - if (!InsertedCopy) - ResultReg = 0; + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } } // If the reg-reg copy failed, select a BIT_CONVERT opcode. @@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { - if (MBB->isLayoutSuccessor(MSucc)) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*MBB, MSucc, NULL, SmallVector()); + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector(), DL); } - MBB->addSuccessor(MSucc); + FuncInfo.MBB->addSuccessor(MSucc); } /// SelectFNeg - Emit an FNeg operation. @@ -712,8 +779,39 @@ FastISel::SelectFNeg(const User *I) { } bool +FastISel::SelectLoad(const User *I) { + LoadInst *LI = const_cast(cast(I)); + + // For a load from an alloca, make a limited effort to find the value + // already available in a register, avoiding redundant loads. + if (!LI->isVolatile() && isa(LI->getPointerOperand())) { + BasicBlock::iterator ScanFrom = LI; + if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(), + LI->getParent(), ScanFrom)) { + if (!V->use_empty() && + (!isa(V) || + cast(V)->getParent() == LI->getParent() || + (isa(V) && + FuncInfo.StaticAllocaMap.count(cast(V)))) && + (!isa(V) || + LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) { + unsigned ResultReg = getRegForValue(V); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + } + } + } + + return false; +} + +bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { + case Instruction::Load: + return SelectLoad(I); case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); case Instruction::FAdd: @@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); - MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; - FastEmitBranch(MSucc); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Alloca: // FunctionLowering has the static-sized case covered. - if (StaticAllocaMap.count(cast(I))) + if (FuncInfo.StaticAllocaMap.count(cast(I))) return true; // Dynamic-sized alloca is not handled yet. @@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) - : MBB(0), - ValueMap(vm), - MBBMap(bm), - StaticAllocaMap(am), - PHINodesToUpdate(pn), -#ifndef NDEBUG - CatchInfoLost(cil), -#endif - MF(mf), - MRI(MF.getRegInfo()), - MFI(*MF.getFrameInfo()), - MCP(*MF.getConstantPool()), - TM(MF.getTarget()), +FastISel::FastISel(FunctionLoweringInfo &funcInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - IsBottomUp(false) { + TRI(*TM.getRegisterInfo()) { } FastISel::~FastISel() {} @@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(MBB, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; } @@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { - BuildMI(MBB, DL, II).addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - const TargetRegisterClass* RC = MRI.getRegClass(Op0); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); - - if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - else { - BuildMI(MBB, DL, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; - } + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; - unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. @@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates + // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { @@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } @@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } - PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 65c36c1..928e1ec 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -30,7 +30,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" @@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (isa(I)) return true; const BasicBlock *BB = I->getParent(); for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - if (cast(*UI)->getParent() != BB || isa(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast(U)->getParent() != BB || isa(U)) return true; + } return false; } @@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { const BasicBlock *Entry = A->getParent()->begin(); for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if (cast(*UI)->getParent() != Entry || isa(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast(U)->getParent() != Entry || isa(U)) return false; // Use not in entry block. + } return true; } @@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, - bool EnableFastISel) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); + // Check whether the function can return without sret-demotion. + SmallVector Outs; + GetReturnInfo(Fn->getReturnType(), + Fn->getAttributes().getRetAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + Outs, Fn->getContext()); + // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); @@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + RegFixups.clear(); } -unsigned FunctionLoweringInfo::MakeReg(EVT VT) { +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } -/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// CreateRegs - Allocate the appropriate number of virtual registers of /// the correctly promoted or expanded types. Assign these registers /// consecutive vreg numbers and return the first assigned number. /// /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { +unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { SmallVector ValueVTs; - ComputeValueVTs(TLI, V->getType(), ValueVTs); + ComputeValueVTs(TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); + EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = MakeReg(RegisterVT); + unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; } } @@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast(I.getOperand(2)); + const ConstantExpr *CE = cast(I.getArgOperand(1)); assert(CE->getOpcode() == Instruction::BitCast && isa(CE->getOperand(0)) && "Personality should be a function"); @@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. std::vector TyInfo; - unsigned N = I.getNumOperands(); + unsigned N = I.getNumArgOperands(); - for (unsigned i = N - 1; i > 2; --i) { - if (const ConstantInt *CI = dyn_cast(I.getOperand(i))) { + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast(I.getArgOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; - assert (FirstCatch <= N && "Invalid filter length"); + assert(FirstCatch <= N && "Invalid filter length"); if (FirstCatch < N) { TyInfo.reserve(N - FirstCatch); for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Filter. TyInfo.reserve(FilterLength - 1); for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addFilterTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } - if (N > 3) { - TyInfo.reserve(N - 3); - for (unsigned j = 3; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h deleted file mode 100644 index 4067a5b..0000000 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ /dev/null @@ -1,144 +0,0 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements routines for translating functions from LLVM IR into -// Machine IR. -// -//===----------------------------------------------------------------------===// - -#ifndef FUNCTIONLOWERINGINFO_H -#define FUNCTIONLOWERINGINFO_H - -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/Support/CallSite.h" -#include - -namespace llvm { - -class AllocaInst; -class BasicBlock; -class CallInst; -class Function; -class GlobalVariable; -class Instruction; -class MachineInstr; -class MachineBasicBlock; -class MachineFunction; -class MachineModuleInfo; -class MachineRegisterInfo; -class TargetLowering; -class Value; - -//===--------------------------------------------------------------------===// -/// FunctionLoweringInfo - This contains information that is global to a -/// function that is used when lowering a region of the function. -/// -class FunctionLoweringInfo { -public: - const TargetLowering &TLI; - const Function *Fn; - MachineFunction *MF; - MachineRegisterInfo *RegInfo; - - /// CanLowerReturn - true iff the function's return value can be lowered to - /// registers. - bool CanLowerReturn; - - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg - /// allocated to hold a pointer to the hidden sret parameter. - unsigned DemoteRegister; - - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. - DenseMap MBBMap; - - /// ValueMap - Since we emit code for the function a basic block at a time, - /// we must remember which virtual registers hold the values for - /// cross-basic-block values. - DenseMap ValueMap; - - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in - /// the entry block. This allows the allocas to be efficiently referenced - /// anywhere in the function. - DenseMap StaticAllocaMap; - - /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for - /// function arguments that are inserted after scheduling is completed. - SmallVector ArgDbgValues; - -#ifndef NDEBUG - SmallSet CatchInfoLost; - SmallSet CatchInfoFound; -#endif - - struct LiveOutInfo { - unsigned NumSignBits; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} - }; - - /// LiveOutRegInfo - Information about live out vregs, indexed by their - /// register number offset by 'FirstVirtualRegister'. - std::vector LiveOutRegInfo; - - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - /// TODO: This isn't per-function state, it's per-basic-block state. But - /// there's no other convenient place for it to live right now. - std::vector > PHINodesToUpdate; - - explicit FunctionLoweringInfo(const TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); - - /// clear - Clear out all the function-specific state. This returns this - /// FunctionLoweringInfo to an empty state, ready to be used for a - /// different function. - void clear(); - - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } -}; - -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(const CallInst &I, - MachineModuleInfo *MMI, MachineBasicBlock *MBB); - -/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 16eb8a7..61c2a90 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; - SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { @@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC, Node->getDebugLoc()); - - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); @@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op, const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); VReg = MRI->createVirtualRegister(RC); } - BuildMI(MBB, Op.getDebugLoc(), + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } @@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, "Don't have operand info for this instruction!"); if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Op.getNode()->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } } @@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG)); - // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(VReg); @@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); } - // Add def, source, and subreg index - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + + // Add source, and subreg index AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { @@ -511,18 +508,13 @@ void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); - - // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Node->getDebugLoc()); - assert(Emitted && - "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; @@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast(V)) { - // FIXME: SDDbgValues aren't updated with legalization, so it's possible - // to have i128 values in them at this point. As a crude workaround, just - // drop the debug info if this happens. + // FIXME: SDDbgValue constants aren't updated with legalization, so it's + // possible to have i128 constants in them at this point. Dwarf writer + // does not handle i128 constants at the moment so, as a crude workaround, + // just drop the debug info if this happens. if (!CI->getValue().isSignedIntN(64)) MIB.addReg(0U); else @@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // The MachineInstr constructor adds implicit-def operands. Scan through + // these to determine which are dead. + if (MI->getNumOperands() != 0 && + Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + // First, collect all used registers. + SmallVector UsedRegs; + for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + if (F->getOpcode() == ISD::CopyFromReg) + UsedRegs.push_back(cast(F->getOperand(1))->getReg()); + else { + // Collect declared implicit uses. + const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); + UsedRegs.append(TID.getImplicitUses(), + TID.getImplicitUses() + TID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + // Then mark unused registers as dead. + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + } // Add result register values for things that are defined by this // instruction. @@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setMemRefs(cast(Node)->memoperands_begin(), cast(Node)->memoperands_end()); + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MI); + if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - InsertPos = MBB->end(); + bool AtEnd = InsertPos == MBB->end(); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); + if (NewMBB != MBB) { + if (AtEnd) + InsertPos = NewMBB->end(); + MBB = NewMBB; + } return; } - MBB->insert(InsertPos, MI); - // Additional results must be an physical register def. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { @@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DestReg = cast(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - - const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; - // Get the register classes of the src/dst. - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - SrcTRC = MRI->getRegClass(SrcReg); - else - SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - DstTRC = MRI->getRegClass(DestReg); - else - DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, - Node->getOperand(1).getValueType()); - - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC, Node->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { @@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); + // Add the isAlignStack bit. + int64_t isAlignStack = + cast(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MI->addOperand(MachineOperand::CreateReg(Reg, true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, - false, false, true)); + MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), + /*isKill=*/ false, + /*isDead=*/ false, + /*isUndef=*/false, + /*isEarlyClobber=*/ true)); } break; case InlineAsm::Kind_RegUse: // Use of register. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62a37a5..7a47da4 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" @@ -133,7 +134,7 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, - SDValue N1, SDValue N2, + SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, @@ -143,6 +144,8 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + std::pair ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); @@ -172,6 +175,8 @@ private: SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + std::pair ExpandAtomic(SDNode *Node); + void ExpandNode(SDNode *Node, SmallVectorImpl &Results); void PromoteNode(SDNode *Node, SmallVectorImpl &Results); }; @@ -181,8 +186,8 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl &Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); - + SmallVector NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (Idx < 0) + if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); @@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, bool OperandsLeadToDest = false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); if (OperandsLeadToDest) { NodesLeadingTo.insert(N); @@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, EVT SVT = VT; while (SVT != MVT::f32) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); - if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && @@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, - OrigVT, DAG.getEntryNode(), + return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, @@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // The last copy may be partial. Do an extending load. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), LD->isNonTemporal(), @@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, Stores.size()); // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. @@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } // aggregate the two parts @@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((isTypeLegal(Node->getOperand(i).getValueType()) || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), - Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { NodesLeadingTo); } - // Now that we legalized all of the inputs (which may have inserted - // libcalls) create the new CALLSEQ_START node. + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Merge in the last call, to ensure that this call start after the last + // Merge in the last call to ensure that this call starts after the last // call ended. if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); } // Remember that the CALLSEQ_START is legalized. @@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } else { Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); @@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; Ops.back() = Tmp2; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); @@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); @@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); @@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, Alignment); @@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp1 = Result.getValue(0); Tmp2 = Result.getValue(1); @@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); @@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } - assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, LD->isVolatile(), LD->isNonTemporal(), @@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { { Tmp3 = LegalizeOp(ST->getValue()); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, TLI); @@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, TLI); @@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, false, false, 0); else - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, NULL, 0, Vec.getValueType().getVectorElementType(), false, false, 0); } @@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { Node->getOperand(i), Idx, SV, Offset, EltVT, false, false, 0)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, SV, Offset, false, false, 0)); } @@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, false, false, DestAlign); } @@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo; +} + SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, @@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } @@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { EVT SHVT = TLI.getShiftAmountTy(); - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), MVT::i64)); - SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); @@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, Alignment)); @@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } +std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVectorImpl &Results) { DebugLoc dl = Node->getDebugLoc(); @@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: - case ISD::MEMBARRIER: case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + TargetLowering::ArgListTy Args; + std::pair CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + // FIXME: Unimplemented for now. Add libcalls. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-Align, + TLI.getPointerTy())); + } + // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, @@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const TargetData &TD = *TLI.getTargetData(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3eb949..650ee5a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), - NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, - L->getMemoryVT(), L->getChain(), + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), L->getMemoryVT(), L->isVolatile(), @@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; - NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { @@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { @@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { @@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { @@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8b382bc..b94ea9a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SmallVector Parts(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); Chain = Parts[i].getValue(1); } @@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always // legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), LHS, RHS, N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { @@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, - N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { @@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(N->getOperand(1).getValueType().getSizeInBits() >= N->getValueType(0).getVectorElementType().getSizeInBits() && "Type of inserted value narrower than vector element type!"); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), GetPromotedInteger(N->getOperand(1)), - N->getOperand(2)); + N->getOperand(2)), + 0); } assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), Idx); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { @@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { SDValue Flag = GetPromotedInteger(N->getOperand(i)); NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); } - return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, - array_lengthof(NewOps)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. - return DAG.UpdateNodeOperands(SDValue(N, 0), - GetPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); - return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, - N->getOperand(1), N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, Cond, + N->getOperand(1), N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { @@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(4))->get()); // The CC (#4) and the possible return values (#2 and #3) have legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), - N->getOperand(3), N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { @@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(2))->get()); // The CC (#2) is always legal. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - ZExtPromotedInteger(N->getOperand(1))); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { @@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - SExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - ZExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { @@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, @@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { @@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { @@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { @@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // upper half of the shift amount is zero. Just use the lower half. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(1), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { @@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { // constant to valid type. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(0), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { @@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 17f131b..6e56c98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + NewOps.append(N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], - NewOps.size()).getNode(); + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // can potentially cause recursive merging. SmallSetVector NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); - - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; - - // Process the list of nodes that need to be reanalyzed. - while (!NodesToAnalyze.empty()) { - SDNode *N = NodesToAnalyze.back(); - NodesToAnalyze.pop_back(); - if (N->getNodeId() != DAGTypeLegalizer::NewNode) - // The node was analyzed while reanalyzing an earlier node - it is safe to - // skip. Note that this is not a morphing node - otherwise it would still - // be marked NewNode. - continue; + do { + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; - // Analyze the node's operands and recalculate the node ID. - SDNode *M = AnalyzeNewNode(N); - if (M != N) { - // The node morphed into a different node. Make everyone use the new node - // instead. - assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); - assert(N->getNumValues() == M->getNumValues() && - "Node morphing changed the number of results!"); - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - SDValue OldVal(N, i); - SDValue NewVal(M, i); - if (M->getNodeId() == Processed) - RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. } - // The original node continues to exist in the DAG, marked NewNode. } - } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c665963..bd86694 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -345,6 +345,9 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -620,6 +623,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 88e1e62..9c2b1d9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. if (TLI.isBigEndian()) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0e2bd02..621c087 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Ops.push_back(LegalizeOp(Node->getOperand(i))); SDValue Result = - DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7efeea1..93aeff5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), @@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); - Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the @@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { uint64_t LoElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoElts) - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + Idx.getValueType())), + 0); } // Store the vector to the stack. @@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, SV, 0, EltVT, false, false, 0); } @@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FPOWI: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FSIN: case ISD::FSQRT: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: Res = WidenVecRes_Unary(N); break; } @@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeLegal(VT) && NumElts != 1) { + while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } else { // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); SmallVector ConcatOps(CurNumElts); unsigned ConcatEnd = 0; // Current ConcatOps index. - unsigned Idx = 0; // Current Idx into input vectors. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, @@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { Idx += NumElts; CurNumElts -= NumElts; } - EVT PrevVecVT = VT; do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeLegal(VT) && NumElts != 1); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); if (NumElts == 1) { - // Since we are using concat vector, build a vector from the scalar ops. - SDValue VecOp = DAG.getUNDEF(PrevVecVT); for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, - DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), - DAG.getIntPtrConstant(i)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } CurNumElts = 0; - ConcatOps[ConcatEnd++] = VecOp; } } @@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return ConcatOps[0]; } - // Rebuild vector to one with the widen type - Idx = ConcatEnd - 1; - while (Idx != 0) { + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); - while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) - --Idx; - if (Idx != 0) { - VT = ConcatOps[Idx].getValueType(); - ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; + } + else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = + WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(VT); + SDValue UndefVal = DAG.getUNDEF(MaxVT); for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } @@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, dl, WidenVT, InOp); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); } +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeLegal(NewInVT)) { + if (TLI.isTypeSynthesizable(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeLegal(NewVT)) { + if (TLI.isTypeSynthesizable(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); @@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, SVOffset + Offset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index ad8630a..3b86c32 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 820ba66..3ef521c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy @@ -1116,7 +1116,7 @@ namespace { SUnit *pop() { if (empty()) return NULL; std::vector::iterator Best = Queue.begin(); - for (std::vector::iterator I = next(Queue.begin()), + for (std::vector::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return left->getHeight() > right->getHeight(); } else if (RStall) return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + return BURRSort(left, right, SPQ); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3185c88..06cf053 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - SU->SchedulingPref = TLI.getSchedulingPreference(N); + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); return SU; } @@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); Cost = RC->getCopyCost(); } } @@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, SelectionDAG *DAG) { SmallVector VTs; - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - VTs.push_back(N->getValueType(i)); + SDNode *FlagDestNode = Flag.getNode(); + + // Don't add a flag from a node to itself. + if (FlagDestNode == N) return; + + // Don't add a flag to something which already has a flag. + if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + if (AddFlag) VTs.push_back(MVT::Flag); + SmallVector Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); - if (Flag.getNode()) + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (FlagDestNode) Ops.push_back(Flag); + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); } /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. @@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, /// offsets are not far apart (target specific), it add MVT::Flag inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. -void ScheduleDAGSDNodes::ClusterNeighboringLoads() { +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. SmallPtrSet Visited; SmallVector Offsets; DenseMap O2SMap; // Map from offset to SDNode. - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - SDNode *Node = &*NI; - if (!Node || !Node->isMachineOpcode()) + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) continue; - - unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (!TID.mayLoad()) + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } - SDNode *Chain = 0; - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); - if (!Chain) - continue; + if (!Cluster) + return; - // Look for other loads of the same chain. Find loads that are loading from - // the same base pointer and different offsets. - Visited.clear(); - Offsets.clear(); - O2SMap.clear(); - bool Cluster = false; - SDNode *Base = Node; - int64_t BaseOffset; - for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { - SDNode *User = *I; - if (User == Node || !Visited.insert(User)) - continue; - int64_t Offset1, Offset2; - if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) - // FIXME: Should be ok if they addresses are identical. But earlier - // optimizations really should have eliminated one of the loads. - continue; - if (O2SMap.insert(std::make_pair(Offset1, Base)).second) - Offsets.push_back(Offset1); - O2SMap.insert(std::make_pair(Offset2, User)); - Offsets.push_back(Offset2); - if (Offset2 < Offset1) { - Base = User; - BaseOffset = Offset2; - } else { - BaseOffset = Offset1; - } - Cluster = true; - } + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } - if (!Cluster) - continue; + if (NumLoads == 0) + return; - // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); - - // Check if the loads are close enough. - SmallVector Loads; - unsigned NumLoads = 0; - int64_t BaseOff = Offsets[0]; - SDNode *BaseLoad = O2SMap[BaseOff]; - Loads.push_back(BaseLoad); - for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { - int64_t Offset = Offsets[i]; - SDNode *Load = O2SMap[Offset]; - if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, - NumLoads)) - break; // Stop right here. Ignore loads that are further away. - Loads.push_back(Load); - ++NumLoads; - } + // Cluster loads by adding MVT::Flag outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + AddFlags(Lead, SDValue(0, 0), true, DAG); + + SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutFlag = I < E - 1; + SDNode *Load = Loads[I]; + + AddFlags(Load, InFlag, OutFlag, DAG); + + if (OutFlag) + InFlag = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } +} - if (NumLoads == 0) +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) continue; - // Cluster loads by adding MVT::Flag outputs and inputs. This also - // ensure they are scheduled in order of increasing addresses. - SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0,0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); - for (unsigned i = 1, e = Loads.size(); i != e; ++i) { - bool OutFlag = i < e-1; - SDNode *Load = Loads[i]; - AddFlags(Load, InFlag, OutFlag, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues()-1); - ++LoadsClustered; - } + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + if (TID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); } } @@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (Cost >= 0) PhysReg = 0; - const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg); + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast(dep)); @@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { - // Cluster loads from "near" addresses into combined SUnits. - ClusterNeighboringLoads(); + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) @@ -473,7 +507,7 @@ namespace { } // ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule use to determine how to insert dbg_value +// to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, return; MachineBasicBlock *BB = Emitter.getBlock(); - if (BB->empty() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, &BB->back())); + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); if (!N->getHasDebugValue()) return; // Opportunistically insert immediate dbg_value uses, i.e. those with source @@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) - BB->insert(BB->end(), DbgMI); + BB->insert(InsertPos, DbgMI); } } @@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { - MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); - while (BBBegin != BB->end() && BBBegin->isPHI()) - ++BBBegin; + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. @@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; - MachineInstr *LastMI = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - MachineBasicBlock *MIBB = MI->getParent(); #ifndef NDEBUG unsigned LastDIOrder = 0; #endif @@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(llvm::next(Pos), DbgMI); } } } LastOrder = Order; - LastMI = MI; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index e8714ba..842fc8c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -108,7 +108,10 @@ namespace llvm { private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. - void ClusterNeighboringLoads(); + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38bf68b..e83a034 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) +SelectionDAG::SelectionDAG(const TargetMachine &tm) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { allnodes_clear(); delete Ordering; - DbgInfo->clear(); delete DbgInfo; } @@ -835,11 +833,8 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - delete Ordering; - Ordering = new SDNodeOrdering(); + Ordering->clear(); DbgInfo->clear(); - delete DbgInfo; - DbgInfo = new SDDbgInfo(); } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { } } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { @@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); unsigned Index = N->getMaskElt(i); @@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast(N1.getNode()); - ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to @@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); - Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N2 == N3) return N2; // select C, X, X -> X break; - case ISD::BRCOND: - if (N2C) { - if (N2C->getZExtValue()) // Unconditional branch - return getNode(ISD::BR, DL, MVT::Other, N1, N3); - else - return N1; // Never-taken branch - } - break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; @@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } + + // If we're optimizing for size, and there is a limit, bump the maximum number + // of operations inserted down to 4. This is a wild guess that approximates + // the size of a call to memcpy or memset (3 arguments + call). + if (Limit != ~0U) { + const Function *F = DAG.getMachineFunction().getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + Limit = 4; + } unsigned NumMemOps = 0; while (Size != 0) { @@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcSV, SrcSVOff + SrcOff, VT, isVol, false, MinAlign(SrcAlign, SrcOff)); @@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemmove(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast(Dst); @@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); - return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } @@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, LoadSDNode *LD = cast(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); - return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - SDValue SV) { - SDValue Ops[] = { Chain, Ptr, SV }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, @@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { /// already exists. If the resultant node does not exist in the DAG, the /// input node is returned. As a degenerate case, if you specify the same /// input operands as the node already has, the input node is returned. -SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); // Check to see if there is no change. - if (Op == N->getOperand(0)) return InN; + if (Op == N->getOperand(0)) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); // Check to see if there is no change. if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) - return InN; // No operands changed, just return the input node. + return N; // No operands changed, just return the input node. // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; return UpdateNodeOperands(N, Ops, 3); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; return UpdateNodeOperands(N, Ops, 4); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; return UpdateNodeOperands(N, Ops, 5); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { } // No operands changed, just return the input node. - if (!AnyChange) return InN; + if (!AnyChange) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } /// DropOperands - Release the operands and set this node to have @@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; - case ISD::FPOWI: return "fpowi"; - case ISD::FPOW: return "fpow"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; // Binary operators case ISD::ADD: return "add"; @@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::VSETCC: return "vsetcc"; case ISD::SELECT: return "select"; @@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - return std::max(Align, FIInfoAlign); - } return FIInfoAlign; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fbe601f..d323c16 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "isel" #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// TLI - The TargetLowering object. - /// - const TargetLowering *TLI; - - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; - - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() : TLI(0) {} - - RegsForValue(const TargetLowering &tli, - const SmallVector ®s, - EVT regvt, EVT valuevt) - : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const TargetLowering &tli, - const SmallVector ®s, - const SmallVector ®vts, - const SmallVector &valuevts) - : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) : TLI(&tli) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); - EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } - - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal() { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; - if (!TLI->isTypeLegal(RegisterVT)) - return false; - } - return true; - } - - - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - TLI = RHS.TLI; - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } - - - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const; - }; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, } } +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(const SmallVector ®s, + const SmallVector ®vts, + const SmallVector &valuevts) + : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector Values(ValueVTs.size()); + SmallVector Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, + Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; @@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); + UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurDebugLoc = DebugLoc(); @@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. SDValue &N = NodeMap[V]; if (N.getNode()) return N; + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) - return N = DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast(C)) - return N = DAG.getGlobalAddress(GV, VT); + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); if (isa(C)) - return N = DAG.getConstant(0, TLI.getPointerTy()); + return DAG.getConstant(0, TLI.getPointerTy()); if (const ConstantFP *CFP = dyn_cast(C)) - return N = DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, VT); if (isa(C) && !V->getType()->isAggregateType()) - return N = DAG.getUNDEF(VT); + return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); @@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } - unsigned InReg = FuncInfo.ValueMap[V]; - assert(InReg && "Value not in map!"); - - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); - SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl &OutVTs, - SmallVectorImpl &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl *Offsets = 0) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } + + llvm_unreachable("Can't get register for value!"); + return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector Outs; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + SmallVector OutVals; - if (!FLI.CanLowerReturn) { - unsigned DemoteReg = FLI.DemoteRegister; + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. @@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true)); + OutVals.push_back(Parts[i]); + } } } } @@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, getCurDebugLoc(), DAG); + Outs, OutVals, getCurDebugLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ } void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // If the branch was constant folded, fix up the CFG. - if (BrCond.getOpcode() == ISD::BR) { - SwitchBB->removeSuccessor(CB.FalseBB); - } else { - // Otherwise, go ahead and insert the false branch. - if (BrCond == getControlRoot()) - SwitchBB->removeSuccessor(CB.TrueBB); - - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); - } + // Insert the false branch. + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // therefore require extension or truncating. SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); @@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); - - // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); - SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), - ISD::SETNE); + SDValue Cmp; + if (CountPopulation_64(B.Mask) == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(ShiftOp.getValueType()), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), + TLI.getPointerTy()), + ISD::SETEQ); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + } SwitchBB->addSuccessor(B.TargetBB); SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), - AndCmp, DAG.getBasicBlock(B.TargetBB)); + Cmp, DAG.getBasicBlock(B.TargetBB)); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallVector succs; @@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; @@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), - AllocSize, - DAG.getConstant(TySize, AllocSize.getValueType())); - EVT IntPtr = TLI.getPointerTy(); - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. - for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { - SDValue Op = getValue(I.getOperand(i)); + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); assert(TLI.isTypeLegal(Op.getValueType()) && "Intrinsic uses a non-legal type?"); Ops.push_back(Op); @@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); @@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. @@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); @@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { void SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - const Value *Val = I.getOperand(1); + const Value *Val = I.getArgOperand(0); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getOperand(2)).getValueType() == MVT::f32 && + getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (Constant *C = const_cast(dyn_cast(Val))) { if (ConstantFP *CFP = dyn_cast(C)) { @@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(2)); + SDValue Op = getValue(I.getArgOperand(1)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); } setValue(&I, result); @@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + MachineBasicBlock *MBB = FuncInfo.MBB; if (MBB != &MF.front()) return false; @@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); @@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast(I.getOperand(2)->getType())->getAddressSpace() + cast(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0)); + I.getArgOperand(0), 0)); return 0; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast(I.getOperand(2)->getType())->getAddressSpace() + cast(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. uint64_t Size = -1ULL; if (ConstantSDNode *C = dyn_cast(Op3)) Size = C->getZExtValue(); - if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getOperand(1), 0, I.getOperand(2), 0)); + false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::dbg_declare: { @@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { bool createUndef = false; // FIXME : Why not use getValue() directly ? - SDValue &N = NodeMap[V]; + SDValue N = NodeMap[V]; + if (!N.getNode() && isa(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), @@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + assert(FuncInfo.MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; @@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *CallMBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CallMBB->isLandingPad()) AddCatchInfo(I, &MMI, CallMBB); @@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); + Ops[0] = getValue(I.getArgOperand(0)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); DAG.setRoot(Op.getValue(1)); @@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, MVT::Other, getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); return 0; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - EVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast(I.getOperand(1)); + ConstantInt *CI = dyn_cast(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); @@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_setjmp: { setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } @@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - const Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Code); setValue(&I, Res); return 0; } case Intrinsic::sqrt: setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)), - getValue(I.getOperand(2)), DAG)); + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::sin: setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cos: setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::log: visitLog(I); @@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, - MVT::i16, getValue(I.getOperand(1)))); + MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, - MVT::f32, getValue(I.getOperand(1)))); + MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } @@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cttz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); return 0; @@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::stackrestore: { - Res = getValue(I.getOperand(1)); + Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); return 0; } @@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(); - SDValue Src = getValue(I.getOperand(1)); // The guard's value. - AllocaInst *Slot = cast(I.getOperand(2)); + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); @@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast(I.getOperand(2)); + ConstantInt *CI = dyn_cast(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); - SDValue Arg = getValue(I.getOperand(0)); + SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); - if (CI->getZExtValue() == 0) + if (CI->isZero()) Res = DAG.getConstant(-1ULL, Ty); else Res = DAG.getConstant(0, Ty); @@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::init_trampoline: { - const Function *F = cast(I.getOperand(2)->stripPointerCasts()); + const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); - Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::TRAMPOLINE, dl, @@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - const Value *Alloca = I.getOperand(1); - const Constant *TypeMap = cast(I.getOperand(2)); + const Value *Alloca = I.getArgOperand(0); + const Constant *TypeMap = cast(I.getArgOperand(1)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } @@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[6]; Ops[0] = getRoot(); for (int x = 1; x < 6; ++x) - Ops[x] = getValue(I.getOperand(x)); + Ops[x] = getValue(I.getArgOperand(x - 1)); DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; @@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; + SmallVector Outs; SmallVector Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, DAG); + FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; @@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); + unsigned NumValues = Outs.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); @@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); @@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumOperands() != 4) + if (I.getNumArgOperands() != 3) return false; - const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getOperand(3)->getType()->isIntegerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast(I.getOperand(3)); + const ConstantInt *Size = dyn_cast(I.getArgOperand(2)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); - if (II) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) @@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { - if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.getType() == I.getOperand(2)->getType()) { - SDValue LHS = getValue(I.getOperand(1)); - SDValue RHS = getValue(I.getOperand(2)); + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType()) { - SDValue Tmp = getValue(I.getOperand(1)); + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; @@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - } else if (isa(I.getOperand(0))) { - visitInlineAsm(&I); - return; } - + SDValue Callee; if (!RenameFn) - Callee = getValue(I.getOperand(0)); + Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); @@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Assemble the legal parts into the final values. - SmallVector Values(ValueVTs.size()); - SmallVector Parts; - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - // Copy the legal parts from the registers. - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - Parts.resize(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue P; - if (Flag == 0) { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); - } else { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); - *Flag = P.getValue(2); - } - - Chain = P.getValue(1); - - // If the source register was virtual and if we know something about it, - // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - if (FLI.LiveOutRegInfo.size() > SlotNo) { - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } - - Parts[i] = P; - } - - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); - Part += NumRegs; - Parts.clear(); - } - - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); -} - -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Get the list of the values's legal parts. - unsigned NumRegs = Regs.size(); - SmallVector Parts(NumRegs); - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); - Part += NumParts; - } - - // Copy the parts into the registers. - SmallVector Chains(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue Part; - if (Flag == 0) { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); - } else { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); - *Flag = Part.getValue(1); - } - - Chains[i] = Part.getValue(0); - } - - if (NumRegs == 1 || Flag) - // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is - // flagged to it. That is the CopyToReg nodes and the user are considered - // a single scheduling unit. If we create a TokenFactor and return it as - // chain, then the TokenFactor is both a predecessor (operand) of the - // user as well as a successor (the TF operands are flagged to the user). - // c1, f1 = CopyToReg - // c2, f2 = CopyToReg - // c3 = TokenFactor c1, c2 - // ... - // = op c3, ..., f2 - Chain = Chains[NumRegs-1]; - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); -} - -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const { - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); - if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); - Ops.push_back(Res); - - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; - for (unsigned i = 0; i != NumRegs; ++i) { - assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); - } - } -} - -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - - namespace llvm { + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : @@ -5041,8 +4983,56 @@ private: Regs.insert(*Aliases); } }; + } // end llvm namespace. +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the @@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, } } - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } @@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (unsigned i = RegStart; i != RegEnd; ++i) Regs.push_back(RegClassRegs[i]); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), OpInfo.ConstraintVT); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. @@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; @@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } RegsForValue MatchedRegs; - MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); @@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], - hasMemory, Ops, DAG); + Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) + !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Finish up input operands. Set the input chain and add the flag last. - AsmNodeOperands[0] = Chain; + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), @@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? @@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0))); + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - DAG.getSrcValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector Outs; + SmallVector OutVals; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); @@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); Outs.push_back(MyFlags); + OutVals.push_back(Parts[j]); } } } @@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SmallVector InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { - SDValue Op = getValue(V); + SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); @@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; - SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector Ins; // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; - getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI); - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - - FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), - OutVTs, OutsFlags, DAG); - if (!FLI.CanLowerReturn) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); @@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; - if (!FLI.CanLowerReturn) { + if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; @@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); - FLI.DemoteRegister = SRetReg; + FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SmallVector ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + for (unsigned Value = 0; Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); @@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegForValue(C); + RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo.ValueMap[PHIOp]; - if (Reg == 0) { + DenseMap::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { assert(isa(PHIOp) && FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegForValue(PHIOp); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3fcd4b9..46733d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -88,6 +88,10 @@ class SelectionDAGBuilder { DebugLoc CurDebugLoc; DenseMap NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap UnusedArgNodeMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -342,6 +346,8 @@ public: void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; @@ -349,6 +355,12 @@ public: N = NewN; } + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 65b8d4f..08ba548 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm, *FuncInfo)), + CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); - FuncInfo->set(Fn, *MF, EnableFastISel); + FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + + // Operand 1 of an inline asm instruction indicates whether the asm + // needs stack or not. + if ((II->isInlineAsm() && II->getOperand(1).getImm()) || + (TID.isCall() && !TID.isReturn())) { MFI->setHasCalls(true); goto done; } @@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap::iterator J = + FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -MachineBasicBlock * -SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, - const BasicBlock *LLVMBB, - BasicBlock::const_iterator Begin, +void +SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted @@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, SDB->clear(); // Final step, emit the lowered DAG as machine code. - return CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } namespace { @@ -372,102 +394,6 @@ public: }; } -/// TrivialTruncElim - Eliminate some trivial nops that can result from -/// ShrinkDemandedOps: (trunc (ext n)) -> n. -static bool TrivialTruncElim(SDValue Op, - TargetLowering::TargetLoweringOpt &TLO) { - SDValue N0 = Op.getOperand(0); - EVT VT = Op.getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND) && - N0.getOperand(0).getValueType() == VT) { - return TLO.CombineTo(Op, N0.getOperand(0)); - } - return false; -} - -/// ShrinkDemandedOps - A late transformation pass that shrink expressions -/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts -/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -void SelectionDAGISel::ShrinkDemandedOps() { - SmallVector Worklist; - SmallPtrSet InWorklist; - - // Add all the dag nodes to the worklist. - Worklist.reserve(CurDAG->allnodes_size()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - Worklist.push_back(I); - InWorklist.insert(I); - } - - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); - while (!Worklist.empty()) { - SDNode *N = Worklist.pop_back_val(); - InWorklist.erase(N); - - if (N->use_empty() && N != CurDAG->getRoot().getNode()) { - // Deleting this node may make its operands dead, add them to the worklist - // if they aren't already there. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (InWorklist.insert(N->getOperand(i).getNode())) - Worklist.push_back(N->getOperand(i).getNode()); - - CurDAG->DeleteNode(N); - continue; - } - - // Run ShrinkDemandedOp on scalar binary operations. - if (N->getNumValues() != 1 || - !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) - continue; - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO) && - (N->getOpcode() != ISD::TRUNCATE || - !TrivialTruncElim(SDValue(N, 0), TLO))) - continue; - - // Revisit the node. - assert(!InWorklist.count(N) && "Already in worklist"); - Worklist.push_back(N); - InWorklist.insert(N); - - // Replace the old value with the new one. - DEBUG(errs() << "\nShrinkDemandedOps replacing "; - TLO.Old.getNode()->dump(CurDAG); - errs() << "\nWith: "; - TLO.New.getNode()->dump(CurDAG); - errs() << '\n'); - - if (InWorklist.insert(TLO.New.getNode())) - Worklist.push_back(TLO.New.getNode()); - - SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); - CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); - - if (!TLO.Old.getNode()->use_empty()) continue; - - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); - i != e; ++i) { - SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); - if (OpNode->hasOneUse()) { - // Add OpNode to the end of the list to revisit. - DeadNodes.RemoveFromWorklist(OpNode); - Worklist.push_back(OpNode); - InWorklist.insert(OpNode); - } - } - - DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); - CurDAG->DeleteNode(TLO.Old.getNode()); - } -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { +void SelectionDAGISel::CodeGenAndEmitDAG() { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); + FuncInfo->MBB->getBasicBlock()->getNameStr(); - DEBUG(dbgs() << "Initial selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 1", GroupName); - CurDAG->Combine(Unrestricted, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { BlockName); bool Changed; - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; + CurDAG->dump()); } - if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); - } else { + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } @@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(OptLevel); - } else { + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 2", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) { - ShrinkDemandedOps(); + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Selection", GroupName); - DoInstructionSelection(); - } else { + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling", GroupName); - Scheduler->Run(CurDAG, BB, BB->end()); - } else { - Scheduler->Run(CurDAG, BB, BB->end()); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); } if (ViewSUnitDAGs) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); - } else { - BB = Scheduler->EmitSchedule(); + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + FuncInfo->MBB = Scheduler->EmitSchedule(); + FuncInfo->InsertPt = Scheduler->InsertPos; } // Free the scheduler state. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); - delete Scheduler; - } else { + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); delete Scheduler; } // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); - - return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { +void SelectionDAGISel::PrepareEHLandingPad() { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(BB); + MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // FIXME: Hack around an exception handling flaw (PR1508): the personality // function and list of typeids logically belong to the invoke (or, if you @@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { // in exceptions not being caught because no typeids are associated with // the invoke. This may not be the only way things can go wrong, but it // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); const BranchInst *Br = dyn_cast(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap, - FuncInfo->PHINodesToUpdate -#ifndef NDEBUG - , FuncInfo->CatchInfoLost -#endif - ); + FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; - MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); - BasicBlock::const_iterator BI = Begin; + BasicBlock::const_iterator BI = End; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // Setup an EH landing-pad block. - if (BB->isLandingPad()) - PrepareEHLandingPad(BB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { + FastIS->startNewBlock(); + // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); } - FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. - for (; BI != End; ++BI) { + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (!Inst->mayWriteToMemory() && + !isa(Inst) && + !isa(Inst) && + !FuncInfo->isExportedInst(Inst)) + continue; + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(Inst)) continue; // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa(BI)) { + if (isa(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; - BI->dump(); + Inst->dump(); } - if (!BI->getType()->isVoidTy() && !BI->use_empty()) { - unsigned &R = FuncInfo->ValueMap[BI]; + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegForValue(BI); + R = FuncInfo->CreateRegs(Inst->getType()); } bool HadTailCall = false; - BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); + SelectBasicBlock(Inst, BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { - BI = End; + --BI; break; } - // If the instruction was codegen'd with multiple blocks, - // inform the FastISel object where to resume inserting. - FastIS->setCurrentBlock(BB); continue; } // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. - if (!isa(BI) || isa(BI)) { + if (!isa(Inst) || isa(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; - BI->dump(); + Inst->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. @@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } break; } + + FastIS->recomputeInsertPt(); } // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. - if (BI != End) { - bool HadTailCall; - BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); - } + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); - FinishBasicBlock(BB); + FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } @@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } void -SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { +SelectionDAGISel::FinishBasicBlock() { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); @@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (!BB->isSuccessor(PHI->getParent())) + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } return; } @@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Update PHI Nodes @@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].second.MBB; + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); @@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->isSuccessor(PHIBB)) { + if (FuncInfo->MBB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } } @@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (BB->isSuccessor(PHI->getParent())) { + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } @@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector Succs; @@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Emit the code. Note that this could result in ThisBB being split, so // we need to check for updates. - SDB->visitSwitchCase(SDB->SwitchCases[i], BB); + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - ThisBB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - BB = Succs[i]; - // BB may have been removed from the CFG if a branch was constant folded. - if (ThisBB->isSuccessor(BB)) { - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->isPHI(); + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); + Phi != FuncInfo->MBB->end() && Phi->isPHI(); ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { @@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) @@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, SDValue(Res, ResNumResults-1)); if ((EmitNodeInfo & OPFL_FlagOutput) != 0) - --ResNumResults; + --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3786bd1..6cae804 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { O << DOTGraphTraits - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); FlaggedNodes.pop_back(); if (!FlaggedNodes.empty()) O << "\n "; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 44a80d3..4f38669 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; - IfCvtBlockSizeLimit = 2; - IfCvtDupBlockSizeLimit = 0; PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT, - TargetLowering* TLI) { + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); @@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// computeRegisterProperties - Once all of the register classes are added, @@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() { for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; - if (!isTypeLegal(VT)) { - MVT IntermediateVT; - EVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } + if (isTypeLegal(VT)) continue; + + MVT IntermediateVT; + EVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && NElts != 1) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + } + if (!IsLegalWiderType) { + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } } @@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl &Outs, + const TargetLowering &TLI, + SmallVectorImpl *Offsets) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + if (Offsets) { + Offsets->push_back(Offset); + Offset += PartSize; + } + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. @@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - EVT VT = cast(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + // Demand all the bits of the input that are demanded in the output. + // The low bits are obvious; the high bits are demanded because we're + // asserting that they're zero here. + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + EVT VT = cast(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); KnownZero |= ~InMask & NewMask; break; } @@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const { switch (ConstraintLetter) { @@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C->getDebugLoc(), Op.getValueType(), Offs)); return; } @@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { /// 'm' over 'r', for example. /// static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - bool hasMemory, const TargetLowering &TLI, + const TargetLowering &TLI, SDValue Op, SelectionDAG *DAG) { assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); unsigned BestIdx = 0; TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; int BestGenerality = -1; - + // Loop over the options, keeping track of the most general one. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - + // If this is an 'other' constraint, see if the operand is valid for it. // For example, on X86 we might have an 'rI' constraint. If the operand // is an integer in the range [0..31] we want to use I (saving a load @@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; @@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, } } + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, - bool hasMemory, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); @@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); } else { - ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + ChooseConstraint(OpInfo, *this, Op, DAG); } // 'X' matches anything. diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 5240bef..6ab0cb0 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/IRBuilder.h" using namespace llvm; @@ -158,7 +159,8 @@ namespace { // Create a new invoke instruction. Args.clear(); - Args.append(CI->op_begin() + 1, CI->op_end()); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, @@ -194,7 +196,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { unsigned NumMeta = 0; SmallVector Metadata; for (unsigned I = 0; I != Roots.size(); ++I) { - Constant *C = cast(Roots[I].first->getOperand(2)); + Constant *C = cast(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); @@ -322,16 +324,16 @@ void ShadowStackGC::CollectRoots(Function &F) { assert(Roots.empty() && "Not cleaned up?"); - SmallVector,16> MetaRoots; + SmallVector, 16> MetaRoots; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) if (IntrinsicInst *CI = dyn_cast(II++)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::gcroot) { - std::pair Pair = std::make_pair( - CI, cast(CI->getOperand(1)->stripPointerCasts())); - if (IsNullValue(CI->getOperand(2))) + std::pair Pair = std::make_pair( + CI, cast(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) Roots.push_back(Pair); else MetaRoots.push_back(Pair); diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h deleted file mode 100644 index f69feaf..0000000 --- a/lib/CodeGen/SimpleHazardRecognizer.h +++ /dev/null @@ -1,89 +0,0 @@ -//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SimpleHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses - /// a coarse classification and attempts to avoid that instructions of - /// a given class aren't grouped too densely together. - class SimpleHazardRecognizer : public ScheduleHazardRecognizer { - /// Class - A simple classification for SUnits. - enum Class { - Other, Load, Store - }; - - /// Window - The Class values of the most recently issued - /// instructions. - Class Window[8]; - - /// getClass - Classify the given SUnit. - Class getClass(const SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad()) - return Load; - if (TID.mayStore()) - return Store; - return Other; - } - - /// Step - Rotate the existing entries in Window and insert the - /// given class value in position as the most recent. - void Step(Class C) { - std::copy(Window+1, array_endof(Window), Window); - Window[array_lengthof(Window)-1] = C; - } - - public: - SimpleHazardRecognizer() : Window() { - Reset(); - } - - virtual HazardType getHazardType(SUnit *SU) { - Class C = getClass(SU); - if (C == Other) - return NoHazard; - unsigned Score = 0; - for (unsigned i = 0; i != array_lengthof(Window); ++i) - if (Window[i] == C) - Score += i + 1; - if (Score > array_lengthof(Window) * 2) - return Hazard; - return NoHazard; - } - - virtual void Reset() { - for (unsigned i = 0; i != array_lengthof(Window); ++i) - Window[i] = Other; - } - - virtual void EmitInstruction(SUnit *SU) { - Step(getClass(SU)); - } - - virtual void AdvanceCycle() { - Step(Other); - } - }; -} - -#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index ed3c243..e69d3e4 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -99,15 +99,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -119,7 +127,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - assert(ALR != IntA.end() && "Live range not found!"); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; VNInfo *AValNo = ALR->valno; // If it's re-defined by an early clobber somewhere in the live range, then // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. @@ -145,26 +154,21 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. - unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); - if (!SrcReg) return false; // Not defined by a copy. - - // If the value number is not defined by a copy instruction, ignore it. - - // If the source register comes from an interval other than IntB, we can't - // handle this. - if (SrcReg != IntB.reg) return false; + if (!CP.isCoalescable(AValNo->getCopy())) + return false; // Get the LiveRange in IntB that this value number starts with. LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - assert(ValLR != IntB.end() && "Live range not found!"); + if (ValLR == IntB.end()) + return false; // Make sure that the end of the live range is inside the same block as // CopyMI. MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || - ValLREndInst->getParent() != CopyMI->getParent()) return false; + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; // Okay, we now know that ValLR ends in the same block that the CopyMI // live-range starts. If there are no intervening live ranges between them in @@ -207,6 +211,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, SRLI.getNextValue(FillerStart, 0, true, @@ -216,7 +222,6 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValLR->valno) { - IntB.addKills(ValLR->valno, BValNo->kills); IntB.MergeValueNumberInto(BValNo, ValLR->valno); } DEBUG({ @@ -230,13 +235,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); - ValLR->valno->removeKill(FillerStart); } // If the copy instruction was killing the destination register before the // merge, find the last use and trim the live range. That will also add the // isKill marker. - if (ALR->valno->isKill(CopyIdx)) + if (ALR->end == CopyIdx) TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); ++numExtends; @@ -304,23 +308,31 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - // FIXME: For now, only eliminate the copy by commuting its def when the // source register is a virtual register. We want to guard against cases // where the copy is a back edge copy and commuting the def lengthen the // live interval of the source register to the entire loop. - if (TargetRegisterInfo::isPhysicalRegister(IntA.reg)) + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) return false; + SlotIndex CopyIdx = + li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -342,6 +354,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, AValNo->isUnused() || AValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isCommutable()) return false; @@ -380,7 +394,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // clobbers from the superreg. if (BHasSubRegs) for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + if (li_->hasInterval(*SR) && + HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. @@ -413,7 +428,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector BDeadValNos; - VNInfo::KillSet BKills; std::map BExtend; // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. @@ -424,8 +438,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // C = A // ... // = B - // - // then do not add kills of A to the newly created B interval. bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; if (Extended) BExtend[ALR->end] = BLR->end; @@ -448,34 +460,38 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; - UseMO.setReg(NewReg); + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); if (UseMI == CopyMI) continue; if (UseMO.isKill()) { if (Extended) UseMO.setIsKill(false); - else - BKills.push_back(UseIdx.getDefIndex()); } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (UseMI->isCopy()) { + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + if (DstReg != IntB.reg || DstSubIdx) + continue; + } else continue; - if (DstReg == IntB.reg && DstSubIdx == 0) { - // This copy will become a noop. If it's defining a new val#, - // remove that val# as well. However this live range is being - // extended to the end of the existing live range defined by the copy. - SlotIndex DefIdx = UseIdx.getDefIndex(); - const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); - BHasPHIKill |= DLR->valno->hasPHIKill(); - assert(DLR->valno->def == DefIdx); - BDeadValNos.push_back(DLR->valno); - BExtend[DLR->start] = DLR->end; - JoinedCopies.insert(UseMI); - // If this is a kill but it's going to be removed, the last use - // of the same val# is the new kill. - if (UseMO.isKill()) - BKills.pop_back(); - } + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + SlotIndex DefIdx = UseIdx.getDefIndex(); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + if (!DLR) + continue; + BHasPHIKill |= DLR->valno->hasPHIKill(); + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); } // We need to insert a new liverange: [ALR.start, LastUse). It may be we can @@ -490,24 +506,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, VNInfo *DeadVNI = BDeadValNos[i]; if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); - const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def); - SRLI.removeValNo(SRLR->valno); + if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def)) + SRLI.removeValNo(SRLR->valno); } } IntB.removeValNo(BDeadValNos[i]); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. Kills are also updated. + // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; ValNo->setCopy(0); - for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { - if (ValNo->kills[j] != BLR->end) - BKills.push_back(ValNo->kills[j]); - } - ValNo->kills.clear(); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -517,18 +530,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (EI != BExtend.end()) End = EI->second; IntB.addRange(LiveRange(AI->start, End, ValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (BHasSubRegs) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.MergeInClobberRange(*li_, AI->start, End, - li_->getVNInfoAllocator()); - } - } } - IntB.addKills(ValNo, BKills); ValNo->setHasPHIKill(BHasPHIKill); DEBUG({ @@ -621,7 +623,11 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, // of last use. LastUse->setIsKill(); removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_); - LR->valno->addKill(LastUseIdx.getDefIndex()); + if (LastUseMI->isCopy()) { + MachineOperand &DefMO = LastUseMI->getOperand(0); + if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) + DefMO.setIsDead(); + } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && DstReg == li.reg && DstSubIdx == 0) { @@ -663,6 +669,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, ValNo->isUnused() || ValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + assert(DefMI && "Defining instruction disappeared"); const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) return false; @@ -701,33 +708,20 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - SlotIndex DefIdx = CopyIdx.getDefIndex(); - const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); - DLR->valno->setCopy(0); - // Don't forget to update sub-register intervals. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } + RemoveCopyFlag(DstReg, CopyMI); // If copy kills the source register, find the last use and propagate // kill. bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (SrcLR->valno->isKill(DefIdx)) + if (SrcLR->end == CopyIdx.getDefIndex()) if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { checkForDeadDef = true; } MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { @@ -747,24 +741,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, MachineOperand &MO = CopyMI->getOperand(i); if (MO.isReg() && MO.isImplicit()) NewMI->addOperand(MO); - if (MO.isDef() && li_->hasInterval(MO.getReg())) { - unsigned Reg = MO.getReg(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - // Handle subregs as well - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } - } + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); } TransferImplicitOps(CopyMI, NewMI); @@ -783,84 +761,72 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, /// being updated is not zero, make sure to set it to the correct physical /// subregister. void -SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, - unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (DstIsPhys && SubIdx) { - // Figure out the real physical register we are updating with. - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } - - // Copy the register use-list before traversing it. We may be adding operands - // and invalidating pointers. - SmallVector, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), - E = mri_->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - - for (unsigned N=0; N != reglist.size(); ++N) { - MachineInstr *UseMI = reglist[N].first; - MachineOperand &O = UseMI->getOperand(reglist[N].second); - unsigned OldSubIdx = O.getSubReg(); +SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. if (DstIsPhys) { - unsigned UseDstReg = DstReg; - if (OldSubIdx) - UseDstReg = tri_->getSubReg(DstReg, OldSubIdx); - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && - CopySrcSubIdx == 0 && - CopyDstSubIdx == 0 && - CopySrcReg != CopyDstReg && - CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { - // If the use is a copy and it won't be coalesced away, and its source - // is defined by a trivial computation, try to rematerialize it instead. - if (!JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, - CopyDstSubIdx, UseMI)) - continue; - } + CopySrcSubIdx == 0 && CopyDstSubIdx == 0 && + CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && + CopyDstReg != DstReg && !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0, + UseMI)) + continue; - O.setReg(UseDstReg); - O.setSubReg(0); - if (OldSubIdx) { - // Def and kill of subregister of a virtual register actually defs and - // kills the whole register. Add imp-defs and imp-kills as needed. - if (O.isDef()) { - if(O.isDead()) - UseMI->addRegisterDead(DstReg, tri_, true); - else - UseMI->addRegisterDefined(DstReg, tri_); - } else if (!O.isUndef() && - (O.isKill() || - UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) - UseMI->addRegisterKilled(DstReg, tri_, true); - } + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - continue; + SmallVector Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); } - // Sub-register indexes goes from small to large. e.g. - // RAX: 1 -> AL, 2 -> AX, 3 -> EAX - // EAX: 1 -> AL, 2 -> AX - // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose - // sub-register 2 is also AX. - // - // FIXME: Properly compose subreg indices for all targets. - // - if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) - ; - else if (SubIdx) - O.setSubReg(SubIdx); - O.setReg(DstReg); + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } DEBUG({ dbgs() << "\t\tupdated: "; @@ -869,15 +835,15 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, dbgs() << *UseMI; }); + // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. - if (JoinedCopies.count(UseMI)) + const TargetInstrDesc &TID = UseMI->getDesc(); + if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2) continue; - const TargetInstrDesc &TID = UseMI->getDesc(); unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && - tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && CopySrcReg != CopyDstReg && (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || @@ -945,6 +911,27 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } +} + /// PropagateDeadness - Propagate the dead marker to the instruction which /// defines the val#. static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, @@ -978,8 +965,8 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // Live-in to the function but dead. Remove it from entry live-in set. if (mf_->begin()->isLiveIn(li.reg)) mf_->begin()->removeLiveIn(li.reg); - const LiveRange *LR = li.getLiveRangeContaining(CopyIdx); - removeRange(li, LR->start, LR->end, li_, tri_); + if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx)) + removeRange(li, LR->start, LR->end, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } @@ -1017,147 +1004,12 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // val#, then propagate the dead marker. PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); ++numDeadValNo; - - if (LR->valno->isKill(RemoveEnd)) - LR->valno->removeKill(RemoveEnd); } removeRange(li, RemoveStart, RemoveEnd, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } -/// CanCoalesceWithImpDef - Returns true if the specified copy instruction -/// from an implicit def to another register can be coalesced away. -bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, - LiveInterval &ImpLi) const{ - if (!CopyMI->killsRegister(ImpLi.reg)) - return false; - // Make sure this is the only use. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg), - UE = mri_->use_end(); UI != UE;) { - MachineInstr *UseMI = &*UI; - ++UI; - if (CopyMI == UseMI || JoinedCopies.count(UseMI)) - continue; - return false; - } - return true; -} - - -/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a -/// a virtual destination register with physical source register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(DstInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(DstInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - // If the virtual register live interval extends into a loop, turn down - // aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); - if (!L) { - // Let's see if the virtual register live interval extends into the loop. - LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); - assert(DLR != DstInt.end() && "Live range not found!"); - DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot()); - if (DLR != DstInt.end()) { - CopyMBB = li_->getMBBFromIndex(DLR->start); - L = loopInfo->getLoopFor(CopyMBB); - } - } - - if (!L || Length <= Threshold) - return true; - - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - if (loopInfo->getLoopFor(SMBB) != L) { - if (!loopInfo->isLoopHeader(CopyMBB)) - return false; - // If vr's live interval extends pass the loop header, do not join. - for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(), - SE = CopyMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB == CopyMBB) - continue; - if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), - li_->getMBBEndIdx(SuccMBB))) - return false; - } - } - return true; -} - -/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a -/// copy from a virtual source register to a physical destination register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use density, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(SrcInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(SrcInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - if (SrcInt.empty()) - // Must be implicit_def. - return false; - - // If the virtual register live interval is defined or cross a loop, turn - // down aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - assert(SLR != SrcInt.end() && "Live range not found!"); - SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot()); - if (SLR == SrcInt.end()) - return true; - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - const MachineLoop *L = loopInfo->getLoopFor(SMBB); - - if (!L || Length <= Threshold) - return true; - - if (loopInfo->getLoopFor(CopyMBB) != L) { - if (SMBB != L->getLoopLatch()) - return false; - // If vr's live interval is extended from before the loop latch, do not - // join. - for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(), - PE = SMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredMBB = *PI; - if (PredMBB == SMBB) - continue; - if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), - li_->getMBBEndIdx(PredMBB))) - return false; - } - } - return true; -} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -1203,157 +1055,6 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, return true; } -/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual -/// register with a physical register, check if any of the virtual register -/// operand is a sub-register use or def. If so, make sure it won't result -/// in an illegal extract_subreg or insert_subreg instruction. e.g. -/// vr1024 = extract_subreg vr1025, 1 -/// ... -/// vr1024 = mov8rr AH -/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since -/// AH does not have a super-reg whose sub-register 1 is AH. -bool -SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, - unsigned PhysReg) { - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - if (O.isDebug()) - continue; - MachineInstr *MI = &*I; - if (MI == CopyMI || JoinedCopies.count(MI)) - continue; - unsigned SubIdx = O.getSubReg(); - if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (MI->isExtractSubreg()) { - SubIdx = MI->getOperand(2).getImm(); - if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (O.isDef()) { - unsigned SrcReg = MI->getOperand(1).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(SrcReg) - ? tri_->getPhysicalRegisterRegClass(SrcReg) - : mri_->getRegClass(SrcReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - if (MI->isInsertSubreg() || MI->isSubregToReg()) { - SubIdx = MI->getOperand(3).getImm(); - if (VirtReg == MI->getOperand(0).getReg()) { - if (!tri_->getSubReg(PhysReg, SubIdx)) - return true; - } else { - unsigned DstReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(DstReg) - ? tri_->getPhysicalRegisterRegClass(DstReg) - : mri_->getRegClass(DstReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - } - return false; -} - - -/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce -/// an extract_subreg where dst is a physical register, e.g. -/// cl = EXTRACT_SUBREG reg1024, 1 -bool -SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealDstReg) { - const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); - RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); - if (!RealDstReg) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " << tri_->getName(DstReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &RHS = li_->getInterval(SrcReg); - // For this type of EXTRACT_SUBREG, conservatively - // check if the live interval of the source register interfere with the - // actual super physical register we are trying to coalesce with. - if (li_->hasInterval(RealDstReg) && - RHS.overlaps(li_->getInterval(RealDstReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealDstReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) - // Do not check DstReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != DstReg && - !tri_->isSubRegister(DstReg, *SR) && - li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce -/// an insert_subreg where src is a physical register, e.g. -/// reg1024 = INSERT_SUBREG reg1024, c1, 0 -bool -SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealSrcReg) { - const TargetRegisterClass *RC = mri_->getRegClass(DstReg); - RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); - if (!RealSrcReg) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " << tri_->getName(SrcReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &LHS = li_->getInterval(DstReg); - if (li_->hasInterval(RealSrcReg) && - LHS.overlaps(li_->getInterval(RealSrcReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealSrcReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) - // Do not check SrcReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != SrcReg && - !tri_->isSubRegister(SrcReg, *SR) && - li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// getRegAllocPreference - Return register allocation preference register. -/// -static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF, - MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; - std::pair Hint = MRI->getRegAllocationHint(Reg); - return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF); -} /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true @@ -1369,354 +1070,97 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; - bool isExtSubReg = CopyMI->isExtractSubreg(); - bool isInsSubReg = CopyMI->isInsertSubreg(); - bool isSubRegToReg = CopyMI->isSubregToReg(); - unsigned SubIdx = 0; - if (isExtSubReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(0).getSubReg(); - SrcReg = CopyMI->getOperand(1).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getImm(); - } else if (isInsSubReg || isSubRegToReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(3).getImm(); - SrcReg = CopyMI->getOperand(2).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx && SrcSubIdx != DstSubIdx) { - // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already " - "coalesced to another register.\n"); - return false; // Not coalescable. - } - } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) { - // e.g. %reg16404:1 = MOV8rr %reg16412:2 - Again = true; - return false; // Not coalescable. - } - } else { - llvm_unreachable("Unrecognized copy instruction!"); + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; } // If they are already joined we continue. - if (SrcReg == DstReg) { + if (CP.getSrcReg() == CP.getDstReg()) { DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - - // If they are both physical registers, we cannot join them. - if (SrcIsPhys && DstIsPhys) { - DEBUG(dbgs() << "\tCan not coalesce physregs.\n"); - return false; // Not coalescable. - } - - // We only join virtual registers with allocatable physical registers. - if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - if (DstIsPhys && !allocatableRegs_[DstReg]) { - DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // We cannot handle dual subreg indices and mismatched classes at the same - // time. - if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) { - DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n"); - return false; - } + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); - // Check that a physical source register is compatible with dst regclass - if (SrcIsPhys) { - unsigned SrcSubReg = SrcSubIdx ? - tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = DstRC; - if (DstSubIdx) - DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); - assert(DstSubRC && "Illegal subregister index"); - if (!DstSubRC->contains(SrcSubReg)) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " - << tri_->getName(SrcSubReg) << " are in " - << DstSubRC->getName() << ".\n"); - return false; // Not coalescable. - } - } - - // Check that a physical dst register is compatible with source regclass - if (DstIsPhys) { - unsigned DstSubReg = DstSubIdx ? - tri_->getSubReg(DstReg, DstSubIdx) : DstReg; - const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg); - const TargetRegisterClass *SrcSubRC = SrcRC; - if (SrcSubIdx) - SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); - assert(SrcSubRC && "Illegal subregister index"); - if (!SrcSubRC->contains(DstSubReg)) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " - << tri_->getName(DstSubReg) << " are in " - << SrcSubRC->getName() << ".\n"); - (void)DstSubReg; - return false; // Not coalescable. + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); + // Only coalesce to allocatable physreg. + if (!allocatableRegs_[CP.getDstReg()]) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. } - } - - // Should be non-null only when coalescing to a sub-register class. - bool CrossRC = false; - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); - const TargetRegisterClass *NewRC = NULL; - unsigned RealDstReg = 0; - unsigned RealSrcReg = 0; - if (isExtSubReg || isInsSubReg || isSubRegToReg) { - SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); - if (SrcIsPhys && isExtSubReg) { - // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be - // coalesced with AX. - unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg(); - if (DstSubIdx) { - // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (DstSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - SrcReg = tri_->getSubReg(SrcReg, SubIdx); - SubIdx = 0; - } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { - // EAX = INSERT_SUBREG EAX, r1024, 0 - unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx) { - // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (SrcSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } else if ((DstIsPhys && isExtSubReg) || - (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { - if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg" - << " of a super-class.\n"); - return false; // Not coalescable. - } - - // FIXME: The following checks are somewhat conservative. Perhaps a better - // way to implement this is to treat this as coalescing a vr with the - // super physical register. - if (isExtSubReg) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) - return false; // Not coalescable - } else { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - } - SubIdx = 0; - } else { - unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg() - : CopyMI->getOperand(2).getSubReg(); - if (OldSubIdx) { - if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg)) - // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - // Also check if the other larger register is of the same register - // class as the would be resulting register. - SubIdx = 0; - else { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } - if (SubIdx) { - if (!DstIsPhys && !SrcIsPhys) { - if (isInsSubReg || isSubRegToReg) { - NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); - } else // extract_subreg { - NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); - } - if (!NewRC) { - DEBUG(dbgs() << "\t Conflicting sub-register indices.\n"); - return false; // Not coalescable - } + } else { + DEBUG({ + dbgs() << " with reg%" << CP.getDstReg(); + if (CP.getSubIdx()) + dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx()); + dbgs() << " to " << CP.getNewRC()->getName() << "\n"; + }); - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - } - } else if (differingRegisterClasses(SrcReg, DstReg)) { - if (DisableCrossClassJoin) - return false; - CrossRC = true; - - // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced - // with another? If it's the resulting destination register, then - // the subidx must be propagated to uses (but only those defined - // by the EXTRACT_SUBREG). If it's being coalesced into another - // register, it should be safe because register is assumed to have - // the register class of the super-register. - - // Process moves where one of the registers have a sub-register index. - MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); - MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); - SubIdx = DstMO->getSubReg(); - if (SubIdx) { - if (SrcMO->getSubReg()) - // FIXME: can we handle this? + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; - // This is not an insert_subreg but it looks like one. - // e.g. %reg1024:4 = MOV32rr %EAX - isInsSubReg = true; - if (SrcIsPhys) { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - SubIdx = 0; - } - } else { - SubIdx = SrcMO->getSubReg(); - if (SubIdx) { - // This is not a extract_subreg but it looks like one. - // e.g. %cl = MOV16rr %reg1024:1 - isExtSubReg = true; - if (DstIsPhys) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) - return false; // Not coalescable - SubIdx = 0; - } - } - } - - // Now determine the register class of the joined register. - if (!SrcIsPhys && !DstIsPhys) { - if (isExtSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; - } else if (isInsSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; - } else { - NewRC = getCommonSubClass(SrcRC, DstRC); - } - - if (!NewRC) { - DEBUG(dbgs() << "\tDisjoint regclasses: " - << SrcRC->getName() << ", " - << DstRC->getName() << ".\n"); - return false; // Not coalescable. } - - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + << CP.getNewRC()->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } } - } - - // Will it create illegal extract_subreg / insert_subreg? - if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg)) - return false; - if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) - return false; - - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && - "Register mapping is horribly broken!"); - DEBUG({ - dbgs() << "\t\tInspecting "; - if (SrcRC) dbgs() << SrcRC->getName() << ": "; - SrcInt.print(dbgs(), tri_); - dbgs() << "\n\t\t and "; - if (DstRC) dbgs() << DstRC->getName() << ": "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial() && CP.isPhys()) { + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } - // Save a copy of the virtual register live interval. We'll manually - // merge this into the "real" physical register live interval this is - // coalesced with. - OwningPtr SavedLI; - if (RealDstReg) - SavedLI.reset(li_->dupInterval(&SrcInt)); - else if (RealSrcReg) - SavedLI.reset(li_->dupInterval(&DstInt)); - - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { - // Check if it is necessary to propagate "isDead" property. - MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); - bool isDead = mopd->isDead(); - - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - if (!isDead && (SrcIsPhys || DstIsPhys)) { - // If the virtual register live interval is long but it has low use - // density, do not join them, instead mark the physical register as its - // allocation preference. - LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; - LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt; - unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; - unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (JoinPInt.ranges.size() > 1000) { - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold && + std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), + mri_->use_nodbg_end()) * Threshold < Length) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) + return true; - const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) - return true; - - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + Again = true; // May be possible to coalesce later. + return false; } } @@ -1724,32 +1168,24 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - bool Swapped = false; - // If SrcInt is implicitly defined, it's safe to coalesce. - if (SrcInt.empty()) { - if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { - // Only coalesce an empty interval (defined by implicit_def) with - // another interval which has a valno defined by the CopyMI and the CopyMI - // is a kill of the implicit def. - DEBUG(dbgs() << "\tNot profitable!\n"); - return false; - } - } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { + if (!JoinIntervals(CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try // rematerializing it. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), + CP.getDstReg(), 0, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || - RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { - JoinedCopies.insert(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + JoinedCopies.insert(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } } // Otherwise, we are unable to join the intervals. @@ -1758,86 +1194,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; } - LiveInterval *ResSrcInt = &SrcInt; - LiveInterval *ResDstInt = &DstInt; - if (Swapped) { - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "LiveInterval::join didn't work right!"); - - // If we're about to merge live ranges into a physical register live interval, - // we have to update any aliased register's live ranges to indicate that they - // have clobbered values for this range. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - // If this is a extract_subreg where dst is a physical register, e.g. - // cl = EXTRACT_SUBREG reg1024, 1 - // then create and update the actual physical register allocated to RHS. - if (RealDstReg || RealSrcReg) { - LiveInterval &RealInt = - li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); - for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), - E = SavedLI->vni_end(); I != E; ++I) { - const VNInfo *ValNo = *I; - VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(), - false, // updated at * - li_->getVNInfoAllocator()); - NewValNo->setFlags(ValNo->getFlags()); // * updated here. - RealInt.addKills(NewValNo, ValNo->kills); - RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); - } - RealInt.weight += SavedLI->weight; - DstReg = RealDstReg ? RealDstReg : RealSrcReg; - } - - // Update the liveintervals of sub-registers. - for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt, - li_->getVNInfoAllocator()); - } - - // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the - // larger super-register. - if ((isExtSubReg || isInsSubReg || isSubRegToReg) && - !SrcIsPhys && !DstIsPhys) { - if ((isExtSubReg && !Swapped) || - ((isInsSubReg || isSubRegToReg) && Swapped)) { - ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator()); - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - } - // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) + if (CP.isCrossClass()) { ++numCrossRCs; - - // This may happen even if it's cross-rc coalescing. e.g. - // %reg1026 = SUBREG_TO_REG 0, %reg1037, 4 - // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to - // be allocate a register from GR64_ABCD. - if (NewRC) - mri_->setRegClass(DstReg, NewRC); + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); - UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + UpdateRegDefsUses(CP); // If we have extended the live range of a physical register, make sure we // update live-in lists as well. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - const LiveInterval &VRegInterval = li_->getInterval(SrcReg); + if (CP.isPhys()) { SmallVector BlockSeq; - for (LiveInterval::const_iterator I = VRegInterval.begin(), - E = VRegInterval.end(); I != E; ++I ) { + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { li_->findLiveInMBBs(I->start, I->end, BlockSeq); for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(DstReg)) - block.addLiveIn(DstReg); + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); } BlockSeq.clear(); } @@ -1845,32 +1227,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // SrcReg is guarateed to be the register whose live interval that is // being merged. - li_->removeInterval(SrcReg); + li_->removeInterval(CP.getSrcReg()); // Update regalloc hint. - tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_); - - // Manually deleted the live interval copy. - if (SavedLI) { - SavedLI->clear(); - SavedLI.reset(); - } - - // If resulting interval has a preference that no longer fits because of subreg - // coalescing, just clear the preference. - unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); - if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && - TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { - const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); - if (!RC->contains(Preference)) - mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); - } + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); DEBUG({ - dbgs() << "\t\tJoined. Result = "; - ResDstInt->print(dbgs(), tri_); - dbgs() << "\n"; - }); + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); ++numJoins; return true; @@ -1927,263 +1294,53 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, return ThisValNoAssignments[VN] = UltimateVN; } -static bool InVector(VNInfo *Val, const SmallVector &V) { - return std::find(V.begin(), V.end(), Val) != V.end(); -} - -static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) - ; - else if (MI->isExtractSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); - } else if (MI->isSubregToReg() || - MI->isInsertSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(2).getReg(); - } else - return false; - return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) && - (DstReg == DR || TRI->isSuperRegister(DR, DstReg)); -} - -/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of -/// the specified live interval is defined by a copy from the specified -/// register. -bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, - LiveRange *LR, - unsigned Reg) { - unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); - if (SrcReg == Reg) - return true; - // FIXME: Do isPHIDef and isDefAccurate both need to be tested? - if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) && - TargetRegisterInfo::isPhysicalRegister(li.reg) && - *tri_->getSuperRegisters(li.reg)) { - // It's a sub-register live interval, we may not have precise information. - // Re-compute it. - MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); - if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) { - // Cache computed info. - LR->valno->def = LR->start; - LR->valno->setCopy(DefMI); - return true; - } - } - return false; -} - - -/// ValueLiveAt - Return true if the LiveRange pointed to by the given -/// iterator, or any subsequent range with the same value number, -/// is live at the given point. -bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr, - LiveInterval::iterator LREnd, - SlotIndex defPoint) const { - for (const VNInfo *valno = LRItr->valno; - (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) { - if (LRItr->contains(defPoint)) - return true; - } - - return false; -} - - -/// SimpleJoin - Attempt to joint the specified interval into this one. The -/// caller of this method must guarantee that the RHS only contains a single -/// value number and that the RHS is not defined by a copy from this -/// interval. This returns false if the intervals are not joinable, or it -/// joins them and returns true. -bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ - assert(RHS.containsOneValue()); - - // Some number (potentially more than one) value numbers in the current - // interval may be defined as copies from the RHS. Scan the overlapping - // portions of the LHS and RHS, keeping track of this and looking for - // overlapping live ranges that are NOT defined as copies. If these exist, we - // cannot coalesce. - - LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); - LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); - - if (LHSIt->start < RHSIt->start) { - LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); - if (LHSIt != LHS.begin()) --LHSIt; - } else if (RHSIt->start < LHSIt->start) { - RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); - if (RHSIt != RHS.begin()) --RHSIt; - } - - SmallVector EliminatedLHSVals; - - while (1) { - // Determine if these live intervals overlap. - bool Overlaps = false; - if (LHSIt->start <= RHSIt->start) - Overlaps = LHSIt->end > RHSIt->start; - else - Overlaps = RHSIt->end > LHSIt->start; - - // If the live intervals overlap, there are two interesting cases: if the - // LHS interval is defined by a copy from the RHS, it's ok and we record - // that the LHS value # is the same as the RHS. If it's not, then we cannot - // coalesce these live ranges and we bail out. - if (Overlaps) { - // If we haven't already recorded that this value # is safe, check it. - if (!InVector(LHSIt->valno, EliminatedLHSVals)) { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) - return false; - // Copy from the RHS? - if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) - return false; // Nope, bail out. - - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesce them since the live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); return false; - EliminatedLHSVals.push_back(LHSIt->valno); - } - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; - continue; - } + } - if (LHSIt->end < RHSIt->end) { - if (++LHSIt == LHSEnd) break; - } else { - // One interesting case to check here. It's possible that we have - // something like "X3 = Y" which defines a new value number in the LHS, - // and is the last use of this liverange of the RHS. In this case, we - // want to notice this copy (so that it gets coalesced away) even though - // the live ranges don't actually overlap. - if (LHSIt->start == RHSIt->end) { - if (InVector(LHSIt->valno, EliminatedLHSVals)) { - // We already know that this value number is going to be merged in - // if coalescing succeeds. Just skip the liverange. - if (++LHSIt == LHSEnd) break; - } else { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); return false; - // Otherwise, if this is a copy from the RHS, mark it as being merged - // in. - if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesced them since live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. - return false; - EliminatedLHSVals.push_back(LHSIt->valno); - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; } } } - - if (++RHSIt == RHSEnd) break; - } - } - - // If we got here, we know that the coalescing will be successful and that - // the value numbers in EliminatedLHSVals will all be merged together. Since - // the most common case is that EliminatedLHSVals has a single number, we - // optimize for it: if there is more than one value, we merge them all into - // the lowest numbered one, then handle the interval as if we were merging - // with one value number. - VNInfo *LHSValNo = NULL; - if (EliminatedLHSVals.size() > 1) { - // Loop through all the equal value numbers merging them into the smallest - // one. - VNInfo *Smallest = EliminatedLHSVals[0]; - for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { - if (EliminatedLHSVals[i]->id < Smallest->id) { - // Merge the current notion of the smallest into the smaller one. - LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); - Smallest = EliminatedLHSVals[i]; - } else { - // Merge into the smallest. - LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); - } } - LHSValNo = Smallest; - } else if (EliminatedLHSVals.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSuperRegisters(LHS.reg)) - // Imprecise sub-register information. Can't handle it. - return false; - llvm_unreachable("No copies from the RHS?"); - } else { - LHSValNo = EliminatedLHSVals[0]; - } - - // Okay, now that there is a single LHS value number that we're merging the - // RHS into, update the value number info for the LHS to indicate that the - // value number is defined where the RHS value number was. - const VNInfo *VNI = RHS.getValNumInfo(0); - LHSValNo->def = VNI->def; - LHSValNo->setCopy(VNI->getCopy()); - - // Okay, the final step is to loop over the RHS live intervals, adding them to - // the LHS. - if (VNI->hasPHIKill()) - LHSValNo->setHasPHIKill(true); - LHS.addKills(LHSValNo, VNI->kills); - LHS.MergeRangesInAsValue(RHS, LHSValNo); - - LHS.ComputeJoinedWeight(RHS); - - // Update regalloc hint if both are virtual registers. - if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && - TargetRegisterInfo::isVirtualRegister(RHS.reg)) { - std::pair RHSPref = mri_->getRegAllocationHint(RHS.reg); - std::pair LHSPref = mri_->getRegAllocationHint(LHS.reg); - if (RHSPref != LHSPref) - mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second); } - // Update the liveintervals of sub-registers. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) - for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS, - li_->getVNInfoAllocator()); - - return true; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. Otherwise, if one of the intervals being joined is a -/// physreg, this method always canonicalizes LHS to be it. The output -/// "RHS" will not have been modified, so we can use this information -/// below to update aliases. -bool -SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, - bool &Swapped) { // Compute the final value assignment, assuming that the live ranges can be // coalesced. SmallVector LHSValNoAssignments; @@ -2192,203 +1349,87 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, DenseMap RHSValsDefinedFromLHS; SmallVector NewVNInfo; - // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSubRegisters(LHS.reg)) { - // If it's coalescing a virtual register to a physical register, estimate - // its live interval length. This is the *cost* of scanning an entire live - // interval. If the cost is low, we'll do an exhaustive check instead. - - // If this is something like this: - // BB1: - // v1024 = op - // ... - // BB2: - // ... - // RAX = v1024 - // - // That is, the live interval of v1024 crosses a bb. Then we can't rely on - // less conservative check. It's possible a sub-register is defined before - // v1024 (or live in) and live out of BB1. - if (RHS.containsOneValue() && - li_->intervalIsInOneMBB(RHS) && - li_->getApproximateInstructionCount(RHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) && - *tri_->getSubRegisters(RHS.reg)) { - if (LHS.containsOneValue() && - li_->getApproximateInstructionCount(LHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - // Compute ultimate value numbers for the LHS and RHS values. - if (RHS.containsOneValue()) { - // Copies from a liveinterval with a single value are simple to handle and - // very common, handle the special case here. This is important, because - // often RHS is small and LHS is large (e.g. a physreg). - - // Find out if the RHS is defined as a copy from some value in the LHS. - int RHSVal0DefinedFromLHS = -1; - int RHSValID = -1; - VNInfo *RHSValNoInfo = NULL; - VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0); - unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0); - if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) { - // If RHS is not defined as a copy from the LHS, we can use simpler and - // faster checks to see if the live ranges are coalescable. This joiner - // can't swap the LHS/RHS intervals though. - if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - return SimpleJoin(LHS, RHS); - } else { - RHSValNoInfo = RHSValNoInfo0; - } - } else { - // It was defined as a copy from the LHS, find out what value # it is. - RHSValNoInfo = - LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno; - RHSValID = RHSValNoInfo->id; - RHSVal0DefinedFromLHS = RHSValID; - } + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.resize(LHS.getNumValNums(), NULL); - - // Okay, *all* of the values in LHS that are defined as a copy from RHS - // should now get updated. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) { - if (LHSSrcReg != RHS.reg) { - // If this is not a copy from the RHS, its value number will be - // unmodified by the coalescing. - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } else if (RHSValID == -1) { - // Otherwise, it is a copy from the RHS, and we don't already have a - // value# for it. Keep the current value number, but remember it. - LHSValNoAssignments[VN] = RHSValID = VN; - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } else { - // Otherwise, use the specified value #. - LHSValNoAssignments[VN] = RHSValID; - if (VN == (unsigned)RHSValID) { // Else this val# is dead. - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } - } - } else { - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } - } + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - assert(RHSValID != -1 && "Didn't find value #?"); - RHSValNoAssignments[0] = RHSValID; - if (RHSVal0DefinedFromLHS != -1) { - // This path doesn't go through ComputeUltimateVN so just set - // it to anything. - RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1; - } - } else { - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != RHS.reg) - continue; + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + LHSValsDefinedFromRHS[VNI] = lr->valno; + } - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - LHSValsDefinedFromRHS[VNI] = lr->valno; - } + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != LHS.reg) - continue; + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - RHSValsDefinedFromLHS[VNI] = lr->valno; - } + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + RHSValsDefinedFromLHS[VNI] = lr->valno; + } - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); } // Armed with the mappings of LHS/RHS values to ultimate values, walk the @@ -2399,15 +1440,17 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, LiveInterval::const_iterator JE = RHS.end(); // Skip ahead until the first place of potential sharing. - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } } - while (1) { + while (I != IE && J != JE) { // Determine if these two live ranges overlap. bool Overlaps; if (I->start < J->start) { @@ -2429,13 +1472,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, return false; } - if (I->end < J->end) { + if (I->end < J->end) ++I; - if (I == IE) break; - } else { + else ++J; - if (J == JE) break; - } } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2443,10 +1483,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = LHSValsDefinedFromRHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; - NewVNInfo[LHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[LHSValID]->setHasPHIKill(true); - RHS.addKills(NewVNInfo[LHSValID], VNI->kills); } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2454,25 +1492,19 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = RHSValsDefinedFromLHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; - NewVNInfo[RHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[RHSValID]->setHasPHIKill(true); - LHS.addKills(NewVNInfo[RHSValID], VNI->kills); } + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. - if ((RHS.ranges.size() > LHS.ranges.size() && - TargetRegisterInfo::isVirtualRegister(LHS.reg)) || - TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = true; - } else { - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = false; - } + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); return true; } @@ -2513,15 +1545,10 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; bool isInsUndef = false; - if (Inst->isExtractSubreg()) { + if (Inst->isCopy()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isInsertSubreg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - if (Inst->getOperand(1).isUndef()) - isInsUndef = true; - } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) { + } else if (Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -2650,6 +1677,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, E = mri_->use_nodbg_end(); I != E; ++I) { MachineOperand &Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); + if (UseMI->isIdentityCopy()) + continue; unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx) @@ -2680,7 +1709,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, // Ignore identity copies. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + if (!MI->isIdentityCopy() && + !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); @@ -2750,10 +1780,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Delete all coalesced copies. bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert((MI->isExtractSubreg() || MI->isInsertSubreg() || - MI->isSubregToReg()) && "Unrecognized copy instruction"); - DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) // Do not delete extract_subreg, insert_subreg of physical // registers unless the definition is dead. e.g. // %DO = INSERT_SUBREG %D0, %S0, 1 @@ -2762,7 +1791,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = false; } if (MI->allDefsAreDead()) { - LiveInterval &li = li_->getInterval(DstReg); + LiveInterval &li = li_->getInterval(SrcReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); DoDelete = true; @@ -2812,12 +1841,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // If the move will be an identity move delete it bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) { + if (MI->isIdentityCopy() || + (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) { if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range - // from the dstination register's live interval. - if (MI->registerDefIsDead(DstReg)) { + // from the destination register's live interval. + if (MI->allDefsAreDead()) { if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) ShortenDeadCopyLiveRange(RegInt, MI); } @@ -2832,17 +1862,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (li_->isNotInMIMap(MI)) continue; - SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex(); + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; unsigned reg = MO.getReg(); if (!reg || !li_->hasInterval(reg)) continue; - LiveInterval &LI = li_->getInterval(reg); - const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); - if (!LR || - (!LR->valno->isKill(UseIdx.getDefIndex()) && - LR->valno->def != UseIdx.getDefIndex())) + if (!li_->getInterval(reg).killedAt(DefIdx)) MO.setIsKill(false); } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 1be04f3..e154da6 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -105,21 +105,12 @@ namespace llvm { /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. bool JoinCopy(CopyRec &TheCopy, bool &Again); - + /// JoinIntervals - Attempt to join these two intervals. On failure, this - /// returns false. Otherwise, if one of the intervals being joined is a - /// physreg, this method always canonicalizes DestInt to be it. The output - /// "SrcInt" will not have been modified, so we can use this information - /// below to update aliases. - bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); - - /// SimpleJoin - Attempt to join the specified interval into this one. The - /// caller of this method must guarantee that the RHS only contains a single - /// value number and that the RHS is not defined by a copy from this - /// interval. This returns false if the intervals are not joinable, or it - /// joins them and returns true. - bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); - + /// returns false. The output "SrcInt" will not have been modified, so we can + /// use this information below to update aliases. + bool JoinIntervals(CoalescerPair &CP); + /// Return true if the two specified registers belong to different register /// classes. The registers may be either phys or virt regs. bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; @@ -128,8 +119,7 @@ namespace llvm { /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); /// HasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. @@ -140,8 +130,7 @@ namespace llvm { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); /// TrimLiveIntervalToLastUse - If there is a last use in the same basic /// block as the copy instruction, trim the ive interval to the last use @@ -155,28 +144,6 @@ namespace llvm { bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); - /// CanCoalesceWithImpDef - Returns true if the specified copy instruction - /// from an implicit def to another register can be coalesced away. - bool CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, LiveInterval &ImpLi) const; - - /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an - /// implicit_def and it is being removed. Turn all copies from this value# - /// into implicit_defs. - void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI); - - /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a - /// a virtual destination register with physical source register. - bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - - /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a - /// copy from a virtual source register to a physical destination register. - bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -185,43 +152,12 @@ namespace llvm { const TargetRegisterClass *DstRC, const TargetRegisterClass *NewRC); - /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual - /// register with a physical register, check if any of the virtual register - /// operand is a sub-register use or def. If so, make sure it won't result - /// in an illegal extract_subreg or insert_subreg instruction. - bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, unsigned PhysReg); - - /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce - /// an extract_subreg where dst is a physical register, e.g. - /// cl = EXTRACT_SUBREG reg1024, 1 - bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce - /// an insert_subreg where src is a physical register, e.g. - /// reg1024 = INSERT_SUBREG reg1024, c1, 0 - bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// ValueLiveAt - Return true if the LiveRange pointed to by the given - /// iterator, or any subsequent range with the same value number, - /// is live at the given point. - bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd, - SlotIndex defPoint) const; - - /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of - /// the specified live interval is defined by a copy from the specified - /// register. - bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, - unsigned Reg); - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. - void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + void UpdateRegDefsUses(const CoalescerPair &CP); /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. /// Return true if live interval is removed. @@ -238,6 +174,10 @@ namespace llvm { /// it as well. bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the + /// VNInfo copy flag for DstReg and all aliases. + void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); + /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 059e8d6..e90869d 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -46,6 +46,8 @@ namespace { Constant *UnregisterFn; Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; Constant *SelectorFn; @@ -69,7 +71,7 @@ namespace { void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); - void splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes); + void splitLiveRangesAcrossInvokes(SmallVector &Invokes); bool insertSjLjEHSupport(Function &F); }; } // end anonymous namespace @@ -107,6 +109,8 @@ bool SjLjEHPass::doInitialization(Module &M) { PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); @@ -175,8 +179,10 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { /// we spill into a stack location, guaranteeing that there is nothing live /// across the unwind edge. This process also splits all critical edges /// coming out of invoke's. +/// FIXME: Move this function to a common utility file (Local.cpp?) so +/// both SjLj and LowerInvoke can use it. void SjLjEHPass:: -splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { +splitLiveRangesAcrossInvokes(SmallVector &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -198,16 +204,33 @@ splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa(Ty) || isa(Ty) || isa(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -266,6 +289,9 @@ splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { } // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); @@ -294,22 +320,34 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // If we don't have any invokes or unwinds, there's nothing to do. if (Unwinds.empty() && Invokes.empty()) return false; - // Find the eh.selector.* and eh.exception calls. We'll use the first - // eh.selector to determine the right personality function to use. For - // SJLJ, we always use the same personality for the whole function, - // not on a per-selector basis. + // Find the eh.selector.*, eh.exception and alloca calls. + // + // Remember any allocas() that aren't in the entry block, as the + // jmpbuf saved SP will need to be updated for them. + // + // We'll use the first eh.selector to determine the right personality + // function to use. For SJLJ, we always use the same personality for the + // whole function, not on a per-selector basis. // FIXME: That's a bit ugly. Better way? SmallVector EH_Selectors; SmallVector EH_Exceptions; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + SmallVector JmpbufUpdatePoints; + // Note: Skip the entry block since there's nothing there that interests + // us. eh.selector and eh.exception shouldn't ever be there, and we + // want to disregard any allocas that are there. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast(I)) { if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getOperand(2); + if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); EH_Selectors.push_back(CI); } else if (CI->getCalledFunction() == ExceptionFn) { EH_Exceptions.push_back(CI); + } else if (CI->getCalledFunction() == StackRestoreFn) { + JmpbufUpdatePoints.push_back(CI); } + } else if (AllocaInst *AI = dyn_cast(I)) { + JmpbufUpdatePoints.push_back(AI); } } } @@ -329,7 +367,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. - splitLiveRangesLiveAcrossInvokes(Invokes); + splitLiveRangesAcrossInvokes(Invokes); BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer @@ -419,7 +457,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Populate the Function Context // 1. LSDA address // 2. Personality function address - // 3. jmpbuf (save FP and call eh.sjlj.setjmp) + // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) // LSDA address Idxs[0] = Zero; @@ -440,31 +478,41 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { new StoreInst(PersonalityFn, PersonalityFieldPtr, true, EntryBB->getTerminator()); - // Save the frame pointer. + // Save the frame pointer. Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *FieldPtr + Value *JBufPtr = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, "jbuf_gep", EntryBB->getTerminator()); Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *ElemPtr = - GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep", + Value *FramePtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", EntryBB->getTerminator()); Value *Val = CallInst::Create(FrameAddrFn, ConstantInt::get(Int32Ty, 0), "fp", EntryBB->getTerminator()); - new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator()); - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, FieldPtr, + CastInst::Create(Instruction::BitCast, JBufPtr, Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); - // check the return value of the setjmp. non-zero goes to dispatcher + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, "notunwind"); @@ -509,6 +557,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Unwinds[i]->eraseFromParent(); } + // Following any allocas not in the entry block, update the saved SP + // in the jmpbuf to the new value. + for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { + Instruction *AI = JmpbufUpdatePoints[i]; + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(AI); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned i = 0, e = Returns.size(); i != e; ++i) diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6110ef5..7a227cf 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -213,9 +213,11 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { - os << getIndex(); + os << entry().getIndex(); if (isPHI()) os << "*"; + else + os << "LudS"[getSlot()]; } // Dump a SlotIndex to stderr. diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index a7b2efe..56bcb28 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -14,18 +14,20 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace { - enum SpillerName { trivial, standard, splitting }; + enum SpillerName { trivial, standard, splitting, inline_ }; } static cl::opt @@ -35,6 +37,7 @@ spillerOpt("spiller", cl::values(clEnumVal(trivial, "trivial spiller"), clEnumVal(standard, "default spiller"), clEnumVal(splitting, "splitting spiller"), + clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), cl::init(standard)); @@ -53,8 +56,8 @@ protected: const TargetInstrInfo *tii; const TargetRegisterInfo *tri; VirtRegMap *vrm; - - /// Construct a spiller base. + + /// Construct a spiller base. SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : mf(mf), lis(lis), vrm(vrm) { @@ -67,7 +70,8 @@ protected: /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. - std::vector trivialSpillEverywhere(LiveInterval *li) { + void trivialSpillEverywhere(LiveInterval *li, + std::vector &newIntervals) { DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -78,8 +82,6 @@ protected: DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); - std::vector added; - const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned ss = vrm->assignVirt2StackSlot(li->reg); @@ -96,7 +98,7 @@ protected: do { ++regItr; } while (regItr != mri->reg_end() && (&*regItr == mi)); - + // Collect uses & defs for this instr. SmallVector indices; bool hasUse = false; @@ -116,7 +118,7 @@ protected: vrm->assignVirt2StackSlot(newVReg, ss); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); newLI->weight = HUGE_VALF; - + // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; @@ -136,10 +138,10 @@ protected: MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, loadInstr); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); - loadVNI->addKill(endIndex); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } @@ -150,17 +152,15 @@ protected: MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, storeInstr); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); - storeVNI->addKill(storeIndex); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } - added.push_back(newLI); + newIntervals.push_back(newLI); } - - return added; } }; @@ -176,11 +176,12 @@ public: TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : SpillerBase(mf, lis, vrm) {} - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex*) { + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &, + SlotIndex*) { // Ignore spillIs - we don't use it. - return trivialSpillEverywhere(li); + trivialSpillEverywhere(li, newIntervals); } }; @@ -200,10 +201,13 @@ public: : lis(lis), loopInfo(loopInfo), vrm(vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex*) { - return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex*) { + std::vector added = + lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + newIntervals.insert(newIntervals.end(), added.begin(), added.end()); } }; @@ -214,7 +218,7 @@ namespace { /// When a call to spill is placed this spiller will first try to break the /// interval up into its component values (one new interval per value). /// If this fails, or if a call is placed to spill a previously split interval -/// then the spiller falls back on the standard spilling mechanism. +/// then the spiller falls back on the standard spilling mechanism. class SplittingSpiller : public StandardSpiller { public: SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, @@ -226,22 +230,21 @@ public: tri = mf->getTarget().getRegisterInfo(); } - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex *earliestStart) { - - if (worthTryingToSplit(li)) { - return tryVNISplit(li, earliestStart); - } - // else - return StandardSpiller::spill(li, spillIs, earliestStart); + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestStart) { + if (worthTryingToSplit(li)) + tryVNISplit(li, earliestStart); + else + StandardSpiller::spill(li, newIntervals, spillIs, earliestStart); } private: MachineRegisterInfo *mri; const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; + const TargetRegisterInfo *tri; DenseSet alreadySplit; bool worthTryingToSplit(LiveInterval *li) const { @@ -258,18 +261,18 @@ private: SmallVector vnis; std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); - + for (SmallVectorImpl::iterator vniItr = vnis.begin(), vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { VNInfo *vni = *vniItr; - - // Skip unused VNIs, or VNIs with no kills. - if (vni->isUnused() || vni->kills.empty()) + + // Skip unused VNIs. + if (vni->isUnused()) continue; DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); LiveInterval *splitInterval = extractVNI(li, vni); - + if (splitInterval != 0) { DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); @@ -281,12 +284,12 @@ private: } else { DEBUG(dbgs() << "0\n"); } - } + } DEBUG(dbgs() << "Original LI: " << *li << "\n"); // If there original interval still contains some live ranges - // add it to added and alreadySplit. + // add it to added and alreadySplit. if (!li->empty()) { added.push_back(li); alreadySplit.insert(li); @@ -302,16 +305,15 @@ private: /// Extract the given value number from the interval. LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { assert(vni->isDefAccurate() || vni->isPHIDef()); - assert(!vni->kills.empty()); - // Create a new vreg and live interval, copy VNI kills & ranges over. + // Create a new vreg and live interval, copy VNI ranges over. const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned newVReg = mri->createVirtualRegister(trc); vrm->grow(); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); - // Start by copying all live ranges in the VN to the new interval. + // Start by copying all live ranges in the VN to the new interval. for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); rItr != rEnd; ++rItr) { if (rItr->valno == vni) { @@ -319,7 +321,7 @@ private: } } - // Erase the old VNI & ranges. + // Erase the old VNI & ranges. li->removeValNo(vni); // Collect all current uses of the register belonging to the given VNI. @@ -336,15 +338,13 @@ private: // Insert a copy at the start of the MBB. The range proceeding the // copy will be attached to the original LiveInterval. MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); - tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = defMBB->begin(); - copyMI->addRegisterKilled(li->reg, tri); + MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), 0, false, lis->getVNInfoAllocator()); phiDefVNI->setIsPHIDef(true); - phiDefVNI->addKill(copyIdx.getDefIndex()); li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); LiveRange *oldPHIDefRange = newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); @@ -367,8 +367,8 @@ private: newVNI->setIsPHIDef(false); // not a PHI def anymore. newVNI->setIsDefAccurate(true); } else { - // non-PHI def. Rename the def. If it's two-addr that means renaming the use - // and inserting a new copy too. + // non-PHI def. Rename the def. If it's two-addr that means renaming the + // use and inserting a new copy too. MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); // We'll rename this now, so we can remove it from uses. uses.erase(defInst); @@ -384,38 +384,26 @@ private: twoAddrUseIsUndef = true; } } - + SlotIndex defIdx = lis->getInstructionIndex(defInst); newVNI->def = defIdx.getDefIndex(); if (isTwoAddr && !twoAddrUseIsUndef) { MachineBasicBlock *defMBB = defInst->getParent(); - tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); + MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - copyMI->addRegisterKilled(li->reg, tri); LiveRange *origUseRange = li->getLiveRangeContaining(newVNI->def.getUseIndex()); - VNInfo *origUseVNI = origUseRange->valno; origUseRange->end = copyIdx.getDefIndex(); - bool updatedKills = false; - for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) { - if (origUseVNI->kills[k] == defIdx.getDefIndex()) { - origUseVNI->kills[k] = copyIdx.getDefIndex(); - updatedKills = true; - break; - } - } - assert(updatedKills && "Failed to update VNI kill list."); VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, true, lis->getVNInfoAllocator()); - copyVNI->addKill(defIdx.getDefIndex()); LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); - } + } } - + for (std::set::iterator usesItr = uses.begin(), usesEnd = uses.end(); usesItr != usesEnd; ++usesItr) { @@ -435,7 +423,7 @@ private: // Check if this instr is two address. unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); - + // Rename uses (and defs for two-address instrs). for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { MachineOperand &mo = useInst->getOperand(i); @@ -451,10 +439,9 @@ private: // reg. MachineBasicBlock *useMBB = useInst->getParent(); MachineBasicBlock::iterator useItr(useInst); - tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = llvm::next(useItr); - copyMI->addRegisterKilled(newVReg, tri); + MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); // Change the old two-address defined range & vni to start at @@ -470,56 +457,44 @@ private: VNInfo *copyVNI = newLI->getNextValue(useIdx.getDefIndex(), 0, true, lis->getVNInfoAllocator()); - copyVNI->addKill(copyIdx.getDefIndex()); LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); } } - - // Iterate over any PHI kills - we'll need to insert new copies for them. - for (VNInfo::KillSet::iterator - killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end(); - killItr != killEnd; ++killItr) { - SlotIndex killIdx(*killItr); - if (killItr->isPHI()) { - MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); - LiveRange *oldKillRange = - newLI->getLiveRangeContaining(killIdx); - - assert(oldKillRange != 0 && "No kill range?"); - - tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), - li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); - copyMI->addRegisterKilled(newVReg, tri); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - // Save the current end. We may need it to add a new range if the - // current range runs of the end of the MBB. - SlotIndex newKillRangeEnd = oldKillRange->end; - oldKillRange->end = copyIdx.getDefIndex(); + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end(); + LRI != LRE; ++LRI) { + if (LRI->valno != newVNI || LRI->end.isPHI()) + continue; + SlotIndex killIdx = LRI->end; + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(), + DebugLoc(), tii->get(TargetOpcode::COPY), + li->reg) + .addReg(newVReg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { - assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && - "PHI kill range doesn't reach kill-block end. Not sane."); - newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), - newKillRangeEnd, newVNI)); - } + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = LRI->end; + LRI->end = copyIdx.getDefIndex(); - *killItr = oldKillRange->end; - VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), - copyMI, true, - lis->getVNInfoAllocator()); - newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB)); - newKillVNI->setHasPHIKill(true); - li->addRange(LiveRange(copyIdx.getDefIndex(), - lis->getMBBEndIdx(killMBB), - newKillVNI)); + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), + newKillRangeEnd, newVNI)); } + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB), + newKillVNI)); } - newVNI->setHasPHIKill(false); return newLI; @@ -530,6 +505,13 @@ private: } // end anonymous namespace +namespace llvm { +Spiller *createInlineSpiller(MachineFunction*, + LiveIntervals*, + const MachineLoopInfo*, + VirtRegMap*); +} + llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { @@ -538,5 +520,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, case trivial: return new TrivialSpiller(mf, lis, vrm); case standard: return new StandardSpiller(lis, loopInfo, vrm); case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); + case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm); } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index dda52e8..450447b 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -33,11 +33,19 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// Spill the given live range. The method used will depend on the Spiller - /// implementation selected. - virtual std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex *earliestIndex = 0) = 0; + /// spill - Spill the given live interval. The method used will depend on + /// the Spiller implementation selected. + /// + /// @param li The live interval to be spilled. + /// @param spillIs A list of intervals that are about to be spilled, + /// and so cannot be used for remat etc. + /// @param newIntervals The newly created intervals will be appended here. + /// @param earliestIndex The earliest point for splitting. (OK, it's another + /// pointer to the allocator guts). + virtual void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex = 0) = 0; }; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 8a6a727..ca5c28c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -136,7 +136,7 @@ bool StackProtector::RequiresStackProtector() const { bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = 0; // The basic block to jump to if check fails. AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Constant *StackGuardVar = 0; // The stack guard variable. + Value *StackGuardVar = 0; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; @@ -153,9 +153,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - PointerType *PtrTy = PointerType::getUnqual( - Type::getInt8Ty(RI->getContext())); - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, AddressSpace)); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } BasicBlock &Entry = F->getEntryBlock(); Instruction *InsPt = &Entry.front(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 7f3b452..eff3c33 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -508,8 +509,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register def. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg() || - MII->isInsertSubreg() || MII->isSubregToReg()) + if (MO.getSubReg() || MII->isSubregToReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -571,7 +571,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register use. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg()) + if (MO.getSubReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -610,8 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumLoadElim; } else { - TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + DstReg).addReg(Reg); ++NumRegRepl; } @@ -627,8 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumStoreElim; } else { - TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(SrcReg); ++NumRegRepl; } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 142398c..59315cf 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterCoalescer.h" @@ -695,9 +696,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert copy from curr.second to a temporary at // the Phi defining curr.second MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); - TII->copyRegToReg(*PI->getParent(), PI, t, - curr.second, RC, RC, DebugLoc()); - + BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY), + t).addReg(curr.second); DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); @@ -712,8 +712,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, } // Insert copy from map[curr.first] to curr.second - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, - map[curr.first], RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]); map[curr.first] = curr.second; DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); @@ -761,8 +761,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert a copy from dest to a new temporary t at the end of b unsigned t = MF->getRegInfo().createVirtualRegister(RC); - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, - curr.second, RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), t).addReg(curr.second); map[curr.second] = t; MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); @@ -830,9 +830,6 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); VNInfo* FirstVN = *Int.vni_begin(); FirstVN->setHasPHIKill(false); - if (I->getOperand(i).isKill()) - FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex()); - LiveRange LR (LI.getMBBStartIdx(I->getParent()), LI.getInstructionIndex(I).getUseIndex().getNextSlot(), FirstVN); @@ -959,9 +956,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { } else { // Insert a last-minute copy if a conflict was detected. const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); - TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), - I->first, SI->first, RC, RC, DebugLoc()); + BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), I->first).addReg(SI->first); LI.renumber(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index f2e2a76..075db80 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/Target/TargetInstrInfo.h" @@ -559,11 +560,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC,RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -618,11 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC, RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 0ad6619..cdacb98 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" @@ -21,11 +22,34 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + // commuteInstruction - The default implementation of this method just exchanges // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, @@ -136,17 +160,9 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MachineOperand &MO = MI->getOperand(0); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { - MO.setReg(DestReg); - MO.setSubReg(SubIdx); - } else if (SubIdx) { - MO.setReg(TRI->getSubReg(DestReg, SubIdx)); - } else { - MO.setReg(DestReg); - } + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); } @@ -175,6 +191,47 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { return FnSize; } +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg); + if (RC == LiveRC || RC->hasSubClass(LiveRC)) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfoImpl:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -182,10 +239,9 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { /// removing the old instruction and adding the new one in the instruction /// stream. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, - int FrameIndex) const { + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -193,34 +249,56 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, else Flags |= MachineMemOperand::MOLoad; + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); - if (!NewMI) return 0; + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, /*Offset=*/0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); - assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && - "Folded a def to a non-store!"); - assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && - "Folded a use to a non-load!"); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); - assert(MFI.getObjectOffset(FrameIndex) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), - Flags, /*Offset=*/0, - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); - NewMI->addMemOperand(MF, MMO); + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } - return NewMI; + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; } /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const { assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); @@ -228,11 +306,15 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); #endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); if (!NewMI) return 0; + NewMI = MBB.insert(MI, NewMI); + // Copy the memoperands from the load to the folded instruction. NewMI->setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end()); @@ -240,11 +322,9 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, return NewMI; } -bool -TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * - MI, - AliasAnalysis * - AA) const { +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetMachine &TM = MF.getTarget(); @@ -324,3 +404,31 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * // Everything checked out. return true; } + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const{ + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 71ad3fb..a80cfc4 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -825,32 +825,32 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection = getContext().getCOFFSection(".text", - MCSectionCOFF::IMAGE_SCN_CNT_CODE | - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_CODE | + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getText()); DataSection = getContext().getCOFFSection(".data", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); ReadOnlySection = getContext().getCOFFSection(".rdata", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); StaticCtorSection = getContext().getCOFFSection(".ctors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); StaticDtorSection = getContext().getCOFFSection(".dtors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // FIXME: We're emitting LSDA info into a readonly section on COFF, even @@ -859,76 +859,76 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, // adjusted or this should be a data section. LSDASection = getContext().getCOFFSection(".gcc_except_table", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); EHFrameSection = getContext().getCOFFSection(".eh_frame", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // Debug info. DwarfAbbrevSection = getContext().getCOFFSection(".debug_abbrev", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfInfoSection = getContext().getCOFFSection(".debug_info", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLineSection = getContext().getCOFFSection(".debug_line", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfFrameSection = getContext().getCOFFSection(".debug_frame", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubNamesSection = getContext().getCOFFSection(".debug_pubnames", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubTypesSection = getContext().getCOFFSection(".debug_pubtypes", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfStrSection = getContext().getCOFFSection(".debug_str", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLocSection = getContext().getCOFFSection(".debug_loc", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfARangesSection = getContext().getCOFFSection(".debug_aranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfRangesSection = getContext().getCOFFSection(".debug_ranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfMacroInfoSection = getContext().getCOFFSection(".debug_macinfo", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DrectveSection = getContext().getCOFFSection(".drectve", - MCSectionCOFF::IMAGE_SCN_LNK_INFO, + COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); } @@ -936,27 +936,27 @@ static unsigned getCOFFSectionFlags(SectionKind K) { unsigned Flags = 0; - if (!K.isMetadata()) + if (K.isMetadata()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE; + COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_CNT_CODE; + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_CNT_CODE; else if (K.isBSS ()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; else if (K.isWriteable()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; return Flags; } @@ -995,10 +995,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, unsigned Characteristics = getCOFFSectionFlags(Kind); - Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); + COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); } if (Kind.isText()) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3d10dc1..5649143 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -381,7 +382,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, DstReg = 0; unsigned SrcSubIdx, DstSubIdx; if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isExtractSubreg()) { + if (MI.isCopy()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); } else if (MI.isInsertSubreg()) { @@ -897,6 +898,108 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (TID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); + if (UnfoldTID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector NewMIs; + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + return false; } @@ -1047,14 +1150,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); ReMatRegs.set(regB); ++NumReMats; } else { - bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc, - mi->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); + BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), + regA).addReg(regB); } MachineBasicBlock::iterator prevMI = prior(mi); @@ -1104,12 +1205,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } } - + + // Schedule the source copy / remat inserted to form two-address + // instruction. FIXME: Does it matter the distance map may not be + // accurate after it's scheduled? + TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); + MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); @@ -1136,14 +1255,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { static void UpdateRegSequenceSrcs(unsigned SrcReg, unsigned DstReg, unsigned SubIdx, - MachineRegisterInfo *MRI) { + MachineRegisterInfo *MRI, + const TargetRegisterInfo &TRI) { for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), RE = MRI->reg_end(); RI != RE; ) { MachineOperand &MO = RI.getOperand(); ++RI; - MO.setReg(DstReg); - assert(MO.getSubReg() == 0); - MO.setSubReg(SubIdx); + MO.substVirtReg(DstReg, SubIdx, TRI); } } @@ -1165,55 +1283,102 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, if (!Seen.insert(SrcReg)) continue; - // If there are no other uses than extract_subreg which feed into + // Check that the instructions are all in the same basic block. + MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); + if (SrcDefMI->getParent() != DstDefMI->getParent()) + continue; + + // If there are no other uses than copies which feed into // the reg_sequence, then we might be able to coalesce them. bool CanCoalesce = true; - SmallVector SubIndices; + SmallVector SrcSubIndices, DstSubIndices; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - if (!UseMI->isExtractSubreg() || - UseMI->getOperand(0).getReg() != DstReg) { + if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { CanCoalesce = false; break; } - SubIndices.push_back(UseMI->getOperand(2).getImm()); + SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); + DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); } - if (!CanCoalesce || SubIndices.size() < 2) + if (!CanCoalesce || SrcSubIndices.size() < 2) continue; - std::sort(SubIndices.begin(), SubIndices.end()); - unsigned NewSubIdx = 0; - if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, - NewSubIdx)) { - bool Proceed = true; - if (NewSubIdx) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - // FIXME: If the sub-registers do not combine to the whole - // super-register, i.e. NewSubIdx != 0, and any of the use has a - // sub-register index, then abort the coalescing attempt. - if (MO.getSubReg()) { - Proceed = false; - break; - } - MO.setReg(DstReg); - MO.setSubReg(NewSubIdx); - } - if (Proceed) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - MO.setReg(DstReg); - if (NewSubIdx) - MO.setSubReg(NewSubIdx); - } + // Check that the source subregisters can be combined. + std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); + unsigned NewSrcSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, + NewSrcSubIdx)) + continue; + + // Check that the destination subregisters can also be combined. + std::sort(DstSubIndices.begin(), DstSubIndices.end()); + unsigned NewDstSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, + NewDstSubIdx)) + continue; + + // If neither source nor destination can be combined to the full register, + // just give up. This could be improved if it ever matters. + if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) + continue; + + // Now that we know that all the uses are extract_subregs and that those + // subregs can somehow be combined, scan all the extract_subregs again to + // make sure the subregs are in the right order and can be composed. + MachineInstr *SomeMI = 0; + CanCoalesce = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + assert(UseMI->isCopy()); + unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); + unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); + assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); + if ((NewDstSubIdx == 0 && + TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || + (NewSrcSubIdx == 0 && + TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { + CanCoalesce = false; + break; + } + // Keep track of one of the uses. + SomeMI = UseMI; + } + if (!CanCoalesce) + continue; + + // Insert a copy to replace the original. + MachineBasicBlock::iterator InsertLoc = SomeMI; + MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, + SomeMI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(SrcReg, 0, NewSrcSubIdx); + + // Remove all the old extract instructions. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ) { + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI == CopyMI) + continue; + assert(UseMI->isCopy()); + // Move any kills to the new copy or extract instruction. + if (UseMI->getOperand(1).isKill()) { + CopyMI->getOperand(1).setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); } + UseMI->eraseFromParent(); + } } } @@ -1268,15 +1433,13 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { } IsImpDef = false; - // Remember EXTRACT_SUBREG sources. These might be candidate for - // coalescing. - if (DefMI->isExtractSubreg()) + // Remember COPY sources. These might be candidate for coalescing. + if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) RealSrcs.push_back(DefMI->getOperand(1).getReg()); - if (!Seen.insert(SrcReg) || - MI->getParent() != DefMI->getParent() || - !MI->getOperand(i).isKill() || - HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + bool isKill = MI->getOperand(i).isKill(); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source is live-in the block. We don't want // to end up with a partial-redef of a livein, e.g. @@ -1292,30 +1455,23 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { // If the REG_SEQUENCE doesn't kill its source, keeping live variables // correctly up to date becomes very difficult. Insert a copy. // - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - unsigned NewReg = MRI->createVirtualRegister(RC); MachineBasicBlock::iterator InsertLoc = MI; - bool Emitted = - TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); - MI->getOperand(i).setReg(NewReg); - if (MI->getOperand(i).isKill()) { - MachineBasicBlock::iterator CopyMI = prior(InsertLoc); - MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg); - KillMO->setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, MI, &*CopyMI); - } + MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, + MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm()) + .addReg(SrcReg, getKillRegState(isKill)); + MI->getOperand(i).setReg(0); + if (LV && isKill) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + DEBUG(dbgs() << "Inserted: " << *CopyMI); } } for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + if (!SrcReg) continue; unsigned SubIdx = MI->getOperand(i+1).getImm(); - UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } if (IsImpDef) { @@ -1328,8 +1484,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MI->eraseFromParent(); } - // Try coalescing some EXTRACT_SUBREG instructions. - CoalesceExtSubRegs(RealSrcs, DstReg); + // Try coalescing some EXTRACT_SUBREG instructions. This can create + // INSERT_SUBREG instructions that must have flags added by + // LiveIntervalAnalysis, so only run it when LiveVariables is available. + if (LV) + CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 871d836..57a1500 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -667,8 +667,7 @@ static void ReMaterialize(MachineBasicBlock &MBB, assert(TID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif - TII->reMaterialize(MBB, MII, DestReg, - ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI); + TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -769,7 +768,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); I != E; ++I) { unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg); + const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); // FIXME: A temporary workaround. We can't reuse available value if it's // not safe to move the def of the virtual register's class. e.g. // X86::RFP* register classes. Do not add it as a live-in. @@ -1022,7 +1021,7 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, for (unsigned i = 0, e = Kills.size(); i != e; ++i) { unsigned Kill = Kills[i]; if (!Defs[Kill] && !Uses[Kill] && - TRI->getPhysicalRegisterRegClass(Kill) == RC) + RC->contains(Kill)) return Kill; } for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { @@ -1410,25 +1409,25 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { assert(NewMIs.size() == 1); MachineInstr *NewMI = NewMIs.back(); + MBB->insert(MII, NewMI); NewMIs.clear(); int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); assert(Idx != -1); SmallVector Ops; Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); + NewMI->eraseFromParent(); if (FoldedMI) { VRM->addSpillSlotUse(SS, FoldedMI); if (!VRM->hasPhys(UnfoldVR)) VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = MBB->insert(MII, FoldedMI); + MII = FoldedMI; InvalidateKills(MI, TRI, RegKills, KillOps); VRM->RemoveMachineInstrFromMaps(&MI); MBB->erase(&MI); - MF.DeleteMachineInstr(NewMI); return true; } - MF.DeleteMachineInstr(NewMI); } } @@ -1480,7 +1479,6 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) return false; - MachineFunction &MF = *MBB->getParent(); MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; @@ -1511,11 +1509,12 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); if (!CommutedMI) return false; + MBB->insert(MII, CommutedMI); SmallVector Ops; Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); // Not needed since foldMemoryOperand returns new MI. - MF.DeleteMachineInstr(CommutedMI); + CommutedMI->eraseFromParent(); if (!FoldedMI) return false; @@ -1528,7 +1527,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *StoreMI = MII; VRM->addSpillSlotUse(SS, StoreMI); VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = MBB->insert(MII, FoldedMI); // Update MII to backtrack. + MII = FoldedMI; // Update MII to backtrack. // Delete all 3 old instructions. InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); @@ -1704,7 +1703,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { std::vector &EmSpills = VRM->getEmergencySpills(MI); for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); assert(RC && "Unable to determine register class!"); int SS = VRM->getEmergencySpillSlot(RC); if (UsedSS.count(SS)) @@ -1759,7 +1758,6 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, bool DoReMat = VRM->isReMaterialized(VirtReg); int SSorRMId = DoReMat ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); if (InReg == Phys) { // If the value is already available in the expected register, save @@ -1793,20 +1791,16 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, *MBB->getParent()); - - TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC, - MI->getDebugLoc()); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), Phys) + .addReg(InReg, RegState::Kill); // This invalidates Phys. Spills.ClobberPhysReg(Phys); // Remember it's available. Spills.addAvailable(SSorRMId, Phys); - // Mark is killed. - MachineInstr *CopyMI = prior(InsertLoc); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); DEBUG(dbgs() << '\t' << *CopyMI); @@ -2013,7 +2007,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // = EXTRACT_SUBREG fi#1 // fi#1 is available in EDI, but it cannot be reused because it's not in // the right register file. - if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) { + if (PhysReg && !AvoidReload && SubIdx) { const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); if (!RC->contains(PhysReg)) PhysReg = 0; @@ -2034,6 +2028,18 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, CanReuse = !ReusedOperands.isClobbered(PhysReg) && Spills.canClobberPhysReg(PhysReg); } + // If this is an asm, and PhysReg is used elsewhere as an earlyclobber + // operand, we can't also use it as an input. (Outputs always come + // before inputs, so we can stop looking at i.) + if (MI.isInlineAsm()) { + for (unsigned k=0; kgetPhys(VirtReg); assert(DesignatedReg && "Must map virtreg to physreg!"); @@ -2136,7 +2144,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, continue; } - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); MRI->setPhysRegUsed(DesignatedReg); ReusedOperands.markClobbered(DesignatedReg); @@ -2144,11 +2151,9 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, SSorRMId, TII, MF); - - TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC, - MI.getDebugLoc()); - - MachineInstr *CopyMI = prior(InsertLoc); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), + DesignatedReg).addReg(PhysReg); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -2269,27 +2274,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC, - MI.getDebugLoc()); MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - unsigned SubIdx = DefMO->getSubReg(); + MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DestReg, RegState::Define, DefMO->getSubReg()) + .addReg(InReg, RegState::Kill); // Revisit the copy so we make sure to notice the effects of the // operation on the destreg (either needing to RA it if it's // virtual or needing to clobber any values if it's physical). - NextMII = &MI; - --NextMII; // backtrack to the copy. + NextMII = CopyMI; NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - // Propagate the sub-register index over. - if (SubIdx) { - DefMO = NextMII->findRegisterDefOperand(DestReg); - DefMO->setSubReg(SubIdx); - } - - // Mark is killed. - MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); - BackTracked = true; } else { DEBUG(dbgs() << "Removing now-noop copy: " << MI); @@ -2430,6 +2424,24 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Also check if it's copying from an "undef", if so, we can't // eliminate this or else the undef marker is lost and it will // confuses the scavenger. This is extremely rare. + if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && + MI.getNumOperands() == 2) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + SmallVector KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); + // Last def is now dead. + TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); + } + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && SrcSR == DstSR && @@ -2519,6 +2531,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Check to see if this is a noop copy. If so, eliminate the // instruction before considering the dest reg to be changed. + if (MI.isIdentityCopy()) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } { unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp index 5e558ca..c8488b2 100644 --- a/lib/CompilerDriver/Tool.cpp +++ b/lib/CompilerDriver/Tool.cpp @@ -85,7 +85,8 @@ StrVector Tool::SortArgs(ArgsVector& Args) const { StrVector Out; // HACK: this won't be needed when we'll migrate away from CommandLine. - std::stable_sort(Args.begin(), Args.end(), &CompareFirst); + std::stable_sort(Args.begin(), Args.end(), + &CompareFirst); for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) { Out.push_back(B->second); } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 0748b54..59ebe6e 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -591,7 +591,7 @@ void Interpreter::popStackAndReturnValueToCaller(const Type *RetTy, ECStack.pop_back(); if (ECStack.empty()) { // Finished main. Put result into exit code... - if (RetTy && RetTy->isIntegerTy()) { // Nonvoid return type? + if (RetTy && !RetTy->isVoidTy()) { // Nonvoid return type? ExitValue = Result; // Capture the exit value of the program } else { memset(&ExitValue.Untyped, 0, sizeof(ExitValue.Untyped)); diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index 26a53b5..57d1260 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -266,7 +266,7 @@ GenericValue Interpreter::callExternalFunction(Function *F, RawFn = (RawFunc)(intptr_t) sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName()); if (!RawFn) - RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F); + RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F); if (RawFn != 0) RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later } else { diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 546d2b2..67bd3ed 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -626,10 +626,7 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); - // JIT the function - isAlreadyCodeGenerating = true; - jitstate->getPM(locked).run(*F); - isAlreadyCodeGenerating = false; + jitTheFunction(F, locked); // If the function referred to another function that had not yet been // read from bitcode, and we are jitting non-lazily, emit it now. @@ -640,10 +637,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { assert(!PF->hasAvailableExternallyLinkage() && "Externally-defined function should not be in pending list."); - // JIT the function - isAlreadyCodeGenerating = true; - jitstate->getPM(locked).run(*PF); - isAlreadyCodeGenerating = false; + jitTheFunction(PF, locked); // Now that the function has been jitted, ask the JITEmitter to rewrite // the stub with real address of the function. @@ -651,6 +645,15 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { } } +void JIT::jitTheFunction(Function *F, const MutexGuard &locked) { + isAlreadyCodeGenerating = true; + jitstate->getPM(locked).run(*F); + isAlreadyCodeGenerating = false; + + // clear basic block addresses after this function is done + getBasicBlockAddressMap(locked).clear(); +} + /// getPointerToFunction - This method is used to get the address of the /// specified function, compiling it if neccesary. /// @@ -687,6 +690,41 @@ void *JIT::getPointerToFunction(Function *F) { return Addr; } +void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap(locked).find(BB); + if (I == getBasicBlockAddressMap(locked).end()) { + getBasicBlockAddressMap(locked)[BB] = Addr; + } else { + // ignore repeats: some BBs can be split into few MBBs? + } +} + +void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { + MutexGuard locked(lock); + getBasicBlockAddressMap(locked).erase(BB); +} + +void *JIT::getPointerToBasicBlock(BasicBlock *BB) { + // make sure it's function is compiled by JIT + (void)getPointerToFunction(BB->getParent()); + + // resolve basic block address + MutexGuard locked(lock); + + BasicBlockAddressMapTy::iterator I = + getBasicBlockAddressMap(locked).find(BB); + if (I != getBasicBlockAddressMap(locked).end()) { + return I->second; + } else { + assert(0 && "JIT does not have BB address for address-of-label, was" + " it eliminated by optimizer?"); + return 0; + } +} + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index edae719..1d1763e 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -51,6 +51,10 @@ public: class JIT : public ExecutionEngine { + /// types + typedef ValueMap + BasicBlockAddressMapTy; + /// data TargetMachine &TM; // The current target we are compiling to TargetJITInfo &TJI; // The JITInfo for the target we are compiling to JITCodeEmitter *JCE; // JCE object @@ -67,6 +71,12 @@ class JIT : public ExecutionEngine { JITState *jitstate; + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode); @@ -90,9 +100,9 @@ public: CodeGenOpt::Level OptLevel = CodeGenOpt::Default, bool GVsWithCode = true, - CodeModel::Model CMM = CodeModel::Default) { + CodeModel::Model CMM = CodeModel::Default) { return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, - CMM); + CMM); } virtual void addModule(Module *M); @@ -127,10 +137,15 @@ public: /// void *getPointerToFunction(Function *F); - void *getPointerToBasicBlock(BasicBlock *BB) { - assert(0 && "JIT does not support address-of-label yet!"); - return 0; - } + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB); /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the @@ -197,11 +212,18 @@ public: const JITEvent_EmittedFunctionDetails &Details); void NotifyFreeingMachineCode(void *OldPtr); + BasicBlockAddressMapTy & + getBasicBlockAddressMap(const MutexGuard &) { + return BasicBlockAddressMap; + } + + private: static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, TargetMachine &tm); void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); void updateFunctionStub(Function *F); + void jitTheFunction(Function *F, const MutexGuard &locked); protected: diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index e3855b2..28d79da 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -435,6 +435,9 @@ namespace { if (MBBLocations.size() <= (unsigned)MBB->getNumber()) MBBLocations.resize((MBB->getNumber()+1)*2); MBBLocations[MBB->getNumber()] = getCurrentPCValue(); + if (MBB->hasAddressTaken()) + TheJIT->addPointerToBasicBlock(MBB->getBasicBlock(), + (void*)getCurrentPCValue()); DEBUG(dbgs() << "JIT: Emitting BB" << MBB->getNumber() << " at [" << (void*) getCurrentPCValue() << "]\n"); } @@ -442,7 +445,7 @@ namespace { virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const; virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const; - virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const { + virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const{ assert(MBBLocations.size() > (unsigned)MBB->getNumber() && MBBLocations[MBB->getNumber()] && "MBB not emitted!"); return MBBLocations[MBB->getNumber()]; @@ -1310,6 +1313,11 @@ void JITEmitter::retryWithMoreMemory(MachineFunction &F) { deallocateMemForFunction(F.getFunction()); // Try again with at least twice as much free space. SizeEstimate = (uintptr_t)(2 * (BufferEnd - BufferBegin)); + + for (MachineFunction::iterator MBB = F.begin(), E = F.end(); MBB != E; ++MBB){ + if (MBB->hasAddressTaken()) + TheJIT->clearPointerToBasicBlock(MBB->getBasicBlock()); + } } /// deallocateMemForFunction - Deallocate all memory for the specified diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp index 2c22550..1be2bec 100644 --- a/lib/Linker/LinkItems.cpp +++ b/lib/Linker/LinkItems.cpp @@ -160,27 +160,26 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) { // Check for a file of name "-", which means "read standard input" if (File.str() == "-") { std::auto_ptr M; - MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(); - if (!Buffer->getBufferSize()) { - delete Buffer; - Error = "standard input is empty"; - } else { - M.reset(ParseBitcodeFile(Buffer, Context, &Error)); - delete Buffer; - if (M.get()) - if (!LinkInModule(M.get(), &Error)) - return false; + if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN(&Error)) { + if (!Buffer->getBufferSize()) { + delete Buffer; + Error = "standard input is empty"; + } else { + M.reset(ParseBitcodeFile(Buffer, Context, &Error)); + delete Buffer; + if (M.get()) + if (!LinkInModule(M.get(), &Error)) + return false; + } } return error("Cannot link stdin: " + Error); } - // Make sure we can at least read the file - if (!File.canRead()) + // Determine what variety of file it is. + std::string Magic; + if (!File.getMagicNumber(Magic, 64)) return error("Cannot find linker input '" + File.str() + "'"); - // If its an archive, try to link it in - std::string Magic; - File.getMagicNumber(Magic, 64); switch (sys::IdentifyFileType(Magic.c_str(), 64)) { default: llvm_unreachable("Bad file type identification"); case sys::Unknown_FileType: diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 5e8a3b6..fc4f3c6 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -14,6 +14,7 @@ add_llvm_library(LLVMMC MCLoggingStreamer.cpp MCMachOStreamer.cpp MCNullStreamer.cpp + MCObjectStreamer.cpp MCObjectWriter.cpp MCSection.cpp MCSectionCOFF.cpp @@ -23,5 +24,7 @@ add_llvm_library(LLVMMC MCSymbol.cpp MCValue.cpp MachObjectWriter.cpp + WinCOFFStreamer.cpp + WinCOFFObjectWriter.cpp TargetAsmBackend.cpp ) diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 57b2bcc..e272b60 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -275,19 +275,20 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Global: // .globl/.global OS << MAI.getGlobalDirective(); break; - case MCSA_Hidden: OS << ".hidden "; break; - case MCSA_IndirectSymbol: OS << ".indirect_symbol "; break; - case MCSA_Internal: OS << ".internal "; break; - case MCSA_LazyReference: OS << ".lazy_reference "; break; - case MCSA_Local: OS << ".local "; break; - case MCSA_NoDeadStrip: OS << ".no_dead_strip "; break; - case MCSA_PrivateExtern: OS << ".private_extern "; break; - case MCSA_Protected: OS << ".protected "; break; - case MCSA_Reference: OS << ".reference "; break; - case MCSA_Weak: OS << ".weak "; break; - case MCSA_WeakDefinition: OS << ".weak_definition "; break; + case MCSA_Hidden: OS << "\t.hidden\t"; break; + case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; + case MCSA_Internal: OS << "\t.internal\t"; break; + case MCSA_LazyReference: OS << "\t.lazy_reference\t"; break; + case MCSA_Local: OS << "\t.local\t"; break; + case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; + case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_Protected: OS << "\t.protected\t"; break; + case MCSA_Reference: OS << "\t.reference\t"; break; + case MCSA_Weak: OS << "\t.weak\t"; break; + case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; // .weak_reference case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; + case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; } OS << *Symbol; @@ -693,7 +694,6 @@ void MCAsmStreamer::EmitRawText(StringRef String) { } void MCAsmStreamer::Finish() { - OS.flush(); } MCStreamer *llvm::createAsmStreamer(MCContext &Context, diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 5936656..7d84554 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -308,24 +308,23 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, return !B_Base && BaseSymbol == A_Base; } -bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const { +bool MCAssembler::isSymbolLinkerVisible(const MCSymbol &Symbol) const { // Non-temporary labels should always be visible to the linker. - if (!SD->getSymbol().isTemporary()) + if (!Symbol.isTemporary()) return true; // Absolute temporary labels are never visible. - if (!SD->getFragment()) + if (!Symbol.isInSection()) return false; // Otherwise, check if the section requires symbols even for temporary labels. - return getBackend().doesSectionRequireSymbols( - SD->getFragment()->getParent()->getSection()); + return getBackend().doesSectionRequireSymbols(Symbol.getSection()); } const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout, const MCSymbolData *SD) const { // Linker visible symbols define atoms. - if (isSymbolLinkerVisible(SD)) + if (isSymbolLinkerVisible(SD->getSymbol())) return SD; // Absolute and undefined symbols have no defining atom. @@ -685,12 +684,8 @@ void MCAssembler::Finish() { for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. - if (it->getFragmentList().empty()) { - unsigned ValueSize = 1; - if (getBackend().isVirtualSection(it->getSection())) - ValueSize = 1; + if (it->getFragmentList().empty()) new MCFillFragment(0, 1, 0, it); - } it->setOrdinal(SectionIndex++); } @@ -759,7 +754,6 @@ void MCAssembler::Finish() { // Write the object file. Writer->WriteObject(*this, Layout); - OS.flush(); stats::ObjectBytes += OS.tell() - StartOffset; } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 53ffc94..1137064 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -27,6 +27,10 @@ MCContext::MCContext(const MCAsmInfo &mai) : MAI(mai), NextUniqueID(0) { MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; + + SecureLogFile = getenv("AS_SECURE_LOG_FILE"); + SecureLog = 0; + SecureLogUsed = false; } MCContext::~MCContext() { @@ -37,6 +41,9 @@ MCContext::~MCContext() { delete (MachOUniqueMapTy*)MachOUniquingMap; delete (ELFUniqueMapTy*)ELFUniquingMap; delete (COFFUniqueMapTy*)COFFUniquingMap; + + // If the stream for the .secure_log_unique directive was created free it. + delete (raw_ostream*)SecureLog; } //===----------------------------------------------------------------------===// @@ -90,14 +97,14 @@ MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + - Twine(NextInstance(LocalLabelVal))); + Twine(NextInstance(LocalLabelVal))); } MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, int bORf) { return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + - Twine(GetInstance(LocalLabelVal) + bORf)); + Twine(GetInstance(LocalLabelVal) + bORf)); } MCSymbol *MCContext::LookupSymbol(StringRef Name) const { diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index c000dd7..343f334 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -40,7 +40,7 @@ void MCExpr::print(raw_ostream &OS) const { const MCSymbol &Sym = SRE.getSymbol(); if (SRE.getKind() == MCSymbolRefExpr::VK_ARM_HI16 || - SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) + SRE.getKind() == MCSymbolRefExpr::VK_ARM_LO16) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); // Parenthesize names that start with $ so that they don't look like @@ -51,8 +51,8 @@ void MCExpr::print(raw_ostream &OS) const { OS << Sym; if (SRE.getKind() != MCSymbolRefExpr::VK_None && - SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && - SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) + SRE.getKind() != MCSymbolRefExpr::VK_ARM_HI16 && + SRE.getKind() != MCSymbolRefExpr::VK_ARM_LO16) OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); return; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 27e4e98..44bc267 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -14,6 +14,7 @@ #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCMachOSymbolFlags.h" @@ -25,21 +26,13 @@ using namespace llvm; namespace { -class MCMachOStreamer : public MCStreamer { - -private: - MCAssembler Assembler; - MCSectionData *CurSectionData; - - /// Track the current atom for each section. - DenseMap CurrentAtomMap; - +class MCMachOStreamer : public MCObjectStreamer { private: MCFragment *getCurrentFragment() const { - assert(CurSectionData && "No current section!"); + assert(getCurrentSectionData() && "No current section!"); - if (!CurSectionData->empty()) - return &CurSectionData->getFragmentList().back(); + if (!getCurrentSectionData()->empty()) + return &getCurrentSectionData()->getFragmentList().back(); return 0; } @@ -49,28 +42,17 @@ private: MCDataFragment *getOrCreateDataFragment() const { MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); if (!F) - F = createDataFragment(); + F = new MCDataFragment(getCurrentSectionData()); return F; } - /// Create a new data fragment in the current section. - MCDataFragment *createDataFragment() const { - MCDataFragment *DF = new MCDataFragment(CurSectionData); - DF->setAtom(CurrentAtomMap.lookup(CurSectionData)); - return DF; - } - void EmitInstToFragment(const MCInst &Inst); void EmitInstToData(const MCInst &Inst); public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &_OS, MCCodeEmitter *_Emitter) - : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS), - CurSectionData(0) {} - ~MCMachOStreamer() {} - - MCAssembler &getAssembler() { return Assembler; } + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} const MCExpr *AddValueSymbols(const MCExpr *Value) { switch (Value->getKind()) { @@ -86,7 +68,7 @@ public: } case MCExpr::SymbolRef: - Assembler.getOrCreateSymbolData( + getAssembler().getOrCreateSymbolData( cast(Value)->getSymbol()); break; @@ -101,7 +83,6 @@ public: /// @name MCStreamer Interface /// @{ - virtual void SwitchSection(const MCSection *Section); virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); @@ -152,6 +133,7 @@ public: } virtual void EmitInstruction(const MCInst &Inst); + virtual void Finish(); /// @} @@ -159,38 +141,25 @@ public: } // end anonymous namespace. -void MCMachOStreamer::SwitchSection(const MCSection *Section) { - assert(Section && "Cannot switch to a null section!"); - - // If already in this section, then this is a noop. - if (Section == CurSection) return; - - CurSection = Section; - CurSectionData = &Assembler.getOrCreateSectionData(*Section); -} - void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(CurSection && "Cannot emit before setting section!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + Symbol->setSection(*CurSection); - // Update the current atom map, if necessary. - bool MustCreateFragment = false; - if (Assembler.isSymbolLinkerVisible(&SD)) { - CurrentAtomMap[CurSectionData] = &SD; + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - // We have to create a new fragment, fragments cannot span atoms. - MustCreateFragment = true; - } + // We have to create a new fragment if this is an atom defining symbol, + // fragments cannot span atoms. + if (getAssembler().isSymbolLinkerVisible(SD.getSymbol())) + new MCDataFragment(getCurrentSectionData()); // FIXME: This is wasteful, we don't necessarily need to create a data // fragment. Instead, we should mark the symbol as pointing into the data // fragment if it exists, otherwise we should just queue the label and set its // fragment pointer when we emit the next fragment. - MCDataFragment *F = - MustCreateFragment ? createDataFragment() : getOrCreateDataFragment(); + MCDataFragment *F = getOrCreateDataFragment(); assert(!SD.getFragment() && "Unexpected fragment on symbol data!"); SD.setFragment(F); SD.setOffset(F->getContents().size()); @@ -203,14 +172,12 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { // FIXME: Cleanup this code, these bits should be emitted based on semantic // properties, not on the order of definition, etc. SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); - - Symbol->setSection(*CurSection); } void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { case MCAF_SubsectionsViaSymbols: - Assembler.setSubsectionsViaSymbols(true); + getAssembler().setSubsectionsViaSymbols(true); return; } @@ -219,7 +186,7 @@ void MCMachOStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { void MCMachOStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { // FIXME: Lift context changes into super class. - Assembler.getOrCreateSymbolData(*Symbol); + getAssembler().getOrCreateSymbolData(*Symbol); Symbol->setVariableValue(AddValueSymbols(Value)); } @@ -232,15 +199,15 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // important for matching the string table that 'as' generates. IndirectSymbolData ISD; ISD.Symbol = Symbol; - ISD.SectionData = CurSectionData; - Assembler.getIndirectSymbols().push_back(ISD); + ISD.SectionData = getCurrentSectionData(); + getAssembler().getIndirectSymbols().push_back(ISD); return; } // Adding a symbol attribute always introduces the symbol, note that an // important side effect of calling getOrCreateSymbolData here is to register // the symbol with the assembler. - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); // The implementation of symbol attributes is designed to match 'as', but it // leaves much to desired. It doesn't really make sense to arbitrarily add and @@ -306,6 +273,10 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, // it has to be in a coalesced section, but this isn't enforced. SD.setFlags(SD.getFlags() | SF_WeakDefinition); break; + + case MCSA_WeakDefAutoPrivate: + SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); + break; } } @@ -313,7 +284,8 @@ void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { // Encode the 'desc' value into the lowest implementation defined bits. assert(DescValue == (DescValue & SF_DescFlagsMask) && "Invalid .desc value!"); - Assembler.getOrCreateSymbolData(*Symbol).setFlags(DescValue&SF_DescFlagsMask); + getAssembler().getOrCreateSymbolData(*Symbol).setFlags( + DescValue & SF_DescFlagsMask); } void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -321,14 +293,14 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); SD.setExternal(true); SD.setCommon(Size, ByteAlignment); } void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, unsigned Size, unsigned ByteAlignment) { - MCSectionData &SectData = Assembler.getOrCreateSectionData(*Section); + MCSectionData &SectData = getAssembler().getOrCreateSectionData(*Section); // The symbol may not be present, which only creates the section. if (!Symbol) @@ -338,7 +310,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); // Emit an align fragment if necessary. if (ByteAlignment != 1) @@ -346,8 +318,6 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); - if (Assembler.isSymbolLinkerVisible(&SD)) - F->setAtom(&SD); Symbol->setSection(*Section); @@ -391,13 +361,12 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; - MCFragment *F = new MCAlignFragment(ByteAlignment, Value, ValueSize, - MaxBytesToEmit, CurSectionData); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); + new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit, + getCurrentSectionData()); // Update the maximum alignment on the current section if necessary. - if (ByteAlignment > CurSectionData->getAlignment()) - CurSectionData->setAlignment(ByteAlignment); + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, @@ -405,24 +374,21 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment, if (MaxBytesToEmit == 0) MaxBytesToEmit = ByteAlignment; MCAlignFragment *F = new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit, - CurSectionData); + getCurrentSectionData()); F->setEmitNops(true); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); // Update the maximum alignment on the current section if necessary. - if (ByteAlignment > CurSectionData->getAlignment()) - CurSectionData->setAlignment(ByteAlignment); + if (ByteAlignment > getCurrentSectionData()->getAlignment()) + getCurrentSectionData()->setAlignment(ByteAlignment); } void MCMachOStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - MCFragment *F = new MCOrgFragment(*Offset, Value, CurSectionData); - F->setAtom(CurrentAtomMap.lookup(CurSectionData)); + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); } void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { - MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData); - IF->setAtom(CurrentAtomMap.lookup(CurSectionData)); + MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); // Add the fixups and data. // @@ -431,7 +397,7 @@ void MCMachOStreamer::EmitInstToFragment(const MCInst &Inst) { SmallVector Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); IF->getCode() = Code; @@ -444,7 +410,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { SmallVector Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); + getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); // Add the fixups and data. @@ -461,21 +427,21 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { if (Inst.getOperand(i).isExpr()) AddValueSymbols(Inst.getOperand(i).getExpr()); - CurSectionData->setHasInstructions(true); + getCurrentSectionData()->setHasInstructions(true); // If this instruction doesn't need relaxation, just emit it as data. - if (!Assembler.getBackend().MayNeedRelaxation(Inst)) { + if (!getAssembler().getBackend().MayNeedRelaxation(Inst)) { EmitInstToData(Inst); return; } // Otherwise, if we are relaxing everything, relax the instruction as much as // possible and emit it as data. - if (Assembler.getRelaxAll()) { + if (getAssembler().getRelaxAll()) { MCInst Relaxed; - Assembler.getBackend().RelaxInstruction(Inst, Relaxed); - while (Assembler.getBackend().MayNeedRelaxation(Relaxed)) - Assembler.getBackend().RelaxInstruction(Relaxed, Relaxed); + getAssembler().getBackend().RelaxInstruction(Inst, Relaxed); + while (getAssembler().getBackend().MayNeedRelaxation(Relaxed)) + getAssembler().getBackend().RelaxInstruction(Relaxed, Relaxed); EmitInstToData(Relaxed); return; } @@ -485,7 +451,36 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { } void MCMachOStreamer::Finish() { - Assembler.Finish(); + // We have to set the fragment atom associations so we can relax properly for + // Mach-O. + + // First, scan the symbol table to build a lookup table from fragments to + // defining symbols. + DenseMap DefiningSymbolMap; + for (MCAssembler::symbol_iterator it = getAssembler().symbol_begin(), + ie = getAssembler().symbol_end(); it != ie; ++it) { + if (getAssembler().isSymbolLinkerVisible(it->getSymbol()) && + it->getFragment()) { + // An atom defining symbol should never be internal to a fragment. + assert(it->getOffset() == 0 && "Invalid offset in atom defining symbol!"); + DefiningSymbolMap[it->getFragment()] = it; + } + } + + // Set the fragment atom associations by tracking the last seen atom defining + // symbol. + for (MCAssembler::iterator it = getAssembler().begin(), + ie = getAssembler().end(); it != ie; ++it) { + MCSymbolData *CurrentAtom = 0; + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + if (MCSymbolData *SD = DefiningSymbolMap.lookup(it2)) + CurrentAtom = SD; + it2->setAtom(CurrentAtom); + } + } + + this->MCObjectStreamer::Finish(); } MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp new file mode 100644 index 0000000..d3f7f77 --- /dev/null +++ b/lib/MC/MCObjectStreamer.cpp @@ -0,0 +1,39 @@ +//===- lib/MC/MCObjectStreamer.cpp - Object File MCStreamer Interface -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectStreamer.h" + +#include "llvm/MC/MCAssembler.h" +using namespace llvm; + +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter) + : MCStreamer(Context), Assembler(new MCAssembler(Context, TAB, + *_Emitter, _OS)), + CurSectionData(0) +{ +} + +MCObjectStreamer::~MCObjectStreamer() { + delete Assembler; +} + +void MCObjectStreamer::SwitchSection(const MCSection *Section) { + assert(Section && "Cannot switch to a null section!"); + + // If already in this section, then this is a noop. + if (Section == CurSection) return; + + CurSection = Section; + CurSectionData = &getAssembler().getOrCreateSectionData(*Section); +} + +void MCObjectStreamer::Finish() { + getAssembler().Finish(); +} diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 1cbe09a..465d983 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -23,7 +23,6 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { CurBuf = NULL; CurPtr = NULL; - TokStart = 0; } AsmLexer::~AsmLexer() { @@ -40,10 +39,6 @@ void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { TokStart = 0; } -SMLoc AsmLexer::getLoc() const { - return SMLoc::getFromPointer(TokStart); -} - /// ReturnError - Set the error to the specified string at the specified /// location. This is defined to always return AsmToken::Error. AsmToken AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { @@ -229,7 +224,7 @@ StringRef AsmLexer::LexUntilEndOfStatement() { TokStart = CurPtr; while (!isAtStartOfComment(*CurPtr) && // Start of line comment. - *CurPtr != ';' && // End of statement marker. + *CurPtr != ';' && // End of statement marker. *CurPtr != '\n' && *CurPtr != '\r' && (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4523eab..793f3c7 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -18,34 +18,85 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetAsmParser.h" using namespace llvm; +namespace { + +/// \brief Generic implementations of directive handling, etc. which is shared +/// (or the default, at least) for all assembler parser. +class GenericAsmParser : public MCAsmParserExtension { +public: + GenericAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + // Debugging directives. + Parser.AddDirectiveHandler(this, ".file", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveFile)); + Parser.AddDirectiveHandler(this, ".line", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLine)); + Parser.AddDirectiveHandler(this, ".loc", MCAsmParser::DirectiveHandler( + &GenericAsmParser::ParseDirectiveLoc)); + } + + bool ParseDirectiveFile(StringRef, SMLoc DirectiveLoc); // ".file" + bool ParseDirectiveLine(StringRef, SMLoc DirectiveLoc); // ".line" + bool ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc); // ".loc" +}; + +} + +namespace llvm { + +extern MCAsmParserExtension *createDarwinAsmParser(); +extern MCAsmParserExtension *createELFAsmParser(); + +} enum { DEFAULT_ADDRSPACE = 0 }; -AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, - const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), TargetParser(0), - CurBuffer(0) { +AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx, + MCStreamer &_Out, const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM), + GenericParser(new GenericAsmParser), PlatformParser(0), + TargetParser(0), CurBuffer(0) { Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - - // Debugging directives. - AddDirectiveHandler(".file", &AsmParser::ParseDirectiveFile); - AddDirectiveHandler(".line", &AsmParser::ParseDirectiveLine); - AddDirectiveHandler(".loc", &AsmParser::ParseDirectiveLoc); -} + // Initialize the generic parser. + GenericParser->Initialize(*this); + // Initialize the platform / file format parser. + // + // FIXME: This is a hack, we need to (majorly) cleanup how these objects are + // created. + if (_MAI.hasSubsectionsViaSymbols()) { + PlatformParser = createDarwinAsmParser(); + PlatformParser->Initialize(*this); + } else { + PlatformParser = createELFAsmParser(); + PlatformParser->Initialize(*this); + } +} AsmParser::~AsmParser() { + delete PlatformParser; + delete GenericParser; +} + +void AsmParser::setTargetParser(TargetAsmParser &P) { + assert(!TargetParser && "Target parser is already initialized!"); + TargetParser = &P; + TargetParser->Initialize(*this); } void AsmParser::Warning(SMLoc L, const Twine &Msg) { @@ -57,11 +108,6 @@ bool AsmParser::Error(SMLoc L, const Twine &Msg) { return true; } -bool AsmParser::TokError(const char *Msg) { - PrintMessage(Lexer.getLoc(), Msg, "error"); - return true; -} - void AsmParser::PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const { SrcMgr.PrintMessage(Loc, Msg, Type); @@ -163,11 +209,6 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { return false; } -MCSymbol *AsmParser::CreateSymbol(StringRef Name) { - // FIXME: Inline into callers. - return Ctx.GetOrCreateSymbol(Name); -} - /// ParsePrimaryExpr - Parse a primary expression and return it. /// primaryexpr ::= (parenexpr /// primaryexpr ::= symbol @@ -188,7 +229,7 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { case AsmToken::Identifier: { // This is a symbol reference. std::pair Split = getTok().getIdentifier().split('@'); - MCSymbol *Sym = CreateSymbol(Split.first); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); // Mark the symbol as used in an expression. Sym->setUsedInExpr(true); @@ -454,8 +495,8 @@ bool AsmParser::ParseStatement() { IDVal = getTok().getString(); Lex(); // Consume the integer token to be used as an identifier token. if (Lexer.getKind() != AsmToken::Colon) { - if (!TheCondState.Ignore) - return TokError("unexpected token at start of statement"); + if (!TheCondState.Ignore) + return TokError("unexpected token at start of statement"); } } } @@ -498,7 +539,7 @@ bool AsmParser::ParseStatement() { // implicitly marked as external. MCSymbol *Sym; if (LocalLabelVal == -1) - Sym = CreateSymbol(IDVal); + Sym = getContext().GetOrCreateSymbol(IDVal); else Sym = Ctx.CreateDirectionalLocalSymbol(LocalLabelVal); if (!Sym->isUndefined() || Sym->isVariable()) @@ -530,158 +571,6 @@ bool AsmParser::ParseStatement() { // Otherwise, we have a normal instruction or directive. if (IDVal[0] == '.') { - // FIXME: This should be driven based on a hash lookup and callback. - if (IDVal == ".section") - return ParseDirectiveDarwinSection(); - if (IDVal == ".text") - // FIXME: This changes behavior based on the -static flag to the - // assembler. - return ParseDirectiveSectionSwitch("__TEXT", "__text", - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); - if (IDVal == ".const") - return ParseDirectiveSectionSwitch("__TEXT", "__const"); - if (IDVal == ".static_const") - return ParseDirectiveSectionSwitch("__TEXT", "__static_const"); - if (IDVal == ".cstring") - return ParseDirectiveSectionSwitch("__TEXT","__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".literal4") - return ParseDirectiveSectionSwitch("__TEXT", "__literal4", - MCSectionMachO::S_4BYTE_LITERALS, - 4); - if (IDVal == ".literal8") - return ParseDirectiveSectionSwitch("__TEXT", "__literal8", - MCSectionMachO::S_8BYTE_LITERALS, - 8); - if (IDVal == ".literal16") - return ParseDirectiveSectionSwitch("__TEXT","__literal16", - MCSectionMachO::S_16BYTE_LITERALS, - 16); - if (IDVal == ".constructor") - return ParseDirectiveSectionSwitch("__TEXT","__constructor"); - if (IDVal == ".destructor") - return ParseDirectiveSectionSwitch("__TEXT","__destructor"); - if (IDVal == ".fvmlib_init0") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init0"); - if (IDVal == ".fvmlib_init1") - return ParseDirectiveSectionSwitch("__TEXT","__fvmlib_init1"); - - // FIXME: The assembler manual claims that this has the self modify code - // flag, at least on x86-32, but that does not appear to be correct. - if (IDVal == ".symbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__symbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - // FIXME: Different on PPC and ARM. - 0, 16); - // FIXME: PowerPC only? - if (IDVal == ".picsymbol_stub") - return ParseDirectiveSectionSwitch("__TEXT","__picsymbol_stub", - MCSectionMachO::S_SYMBOL_STUBS | - MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, - 0, 26); - if (IDVal == ".data") - return ParseDirectiveSectionSwitch("__DATA", "__data"); - if (IDVal == ".static_data") - return ParseDirectiveSectionSwitch("__DATA", "__static_data"); - - // FIXME: The section names of these two are misspelled in the assembler - // manual. - if (IDVal == ".non_lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__nl_symbol_ptr", - MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, - 4); - if (IDVal == ".lazy_symbol_pointer") - return ParseDirectiveSectionSwitch("__DATA", "__la_symbol_ptr", - MCSectionMachO::S_LAZY_SYMBOL_POINTERS, - 4); - - if (IDVal == ".dyld") - return ParseDirectiveSectionSwitch("__DATA", "__dyld"); - if (IDVal == ".mod_init_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_init_func", - MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, - 4); - if (IDVal == ".mod_term_func") - return ParseDirectiveSectionSwitch("__DATA", "__mod_term_func", - MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, - 4); - if (IDVal == ".const_data") - return ParseDirectiveSectionSwitch("__DATA", "__const"); - - - if (IDVal == ".objc_class") - return ParseDirectiveSectionSwitch("__OBJC", "__class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_meta_class") - return ParseDirectiveSectionSwitch("__OBJC", "__meta_class", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cat_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cat_inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_protocol") - return ParseDirectiveSectionSwitch("__OBJC", "__protocol", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_string_object") - return ParseDirectiveSectionSwitch("__OBJC", "__string_object", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_inst_meth") - return ParseDirectiveSectionSwitch("__OBJC", "__inst_meth", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_cls_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__cls_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_message_refs") - return ParseDirectiveSectionSwitch("__OBJC", "__message_refs", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP | - MCSectionMachO::S_LITERAL_POINTERS, - 4); - if (IDVal == ".objc_symbols") - return ParseDirectiveSectionSwitch("__OBJC", "__symbols", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_category") - return ParseDirectiveSectionSwitch("__OBJC", "__category", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__class_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_instance_vars") - return ParseDirectiveSectionSwitch("__OBJC", "__instance_vars", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_module_info") - return ParseDirectiveSectionSwitch("__OBJC", "__module_info", - MCSectionMachO::S_ATTR_NO_DEAD_STRIP); - if (IDVal == ".objc_class_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_types") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_meth_var_names") - return ParseDirectiveSectionSwitch("__TEXT", "__cstring", - MCSectionMachO::S_CSTRING_LITERALS); - if (IDVal == ".objc_selector_strs") - return ParseDirectiveSectionSwitch("__OBJC", "__selector_strs", - MCSectionMachO::S_CSTRING_LITERALS); - - if (IDVal == ".tdata") - return ParseDirectiveSectionSwitch("__DATA", "__thread_data", - MCSectionMachO::S_THREAD_LOCAL_REGULAR); - if (IDVal == ".tlv") - return ParseDirectiveSectionSwitch("__DATA", "__thread_vars", - MCSectionMachO::S_THREAD_LOCAL_VARIABLES); - if (IDVal == ".thread_init_func") - return ParseDirectiveSectionSwitch("__DATA", "__thread_init", - MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); - // Assembler features if (IDVal == ".set") return ParseDirectiveSet(); @@ -756,36 +645,25 @@ bool AsmParser::ParseStatement() { return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); if (IDVal == ".weak_reference") return ParseDirectiveSymbolAttribute(MCSA_WeakReference); + if (IDVal == ".weak_def_can_be_hidden") + return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); if (IDVal == ".comm") return ParseDirectiveComm(/*IsLocal=*/false); if (IDVal == ".lcomm") return ParseDirectiveComm(/*IsLocal=*/true); - if (IDVal == ".zerofill") - return ParseDirectiveDarwinZerofill(); - if (IDVal == ".desc") - return ParseDirectiveDarwinSymbolDesc(); - if (IDVal == ".lsym") - return ParseDirectiveDarwinLsym(); - if (IDVal == ".tbss") - return ParseDirectiveDarwinTBSS(); - - if (IDVal == ".subsections_via_symbols") - return ParseDirectiveDarwinSubsectionsViaSymbols(); + if (IDVal == ".abort") return ParseDirectiveAbort(); if (IDVal == ".include") return ParseDirectiveInclude(); - if (IDVal == ".dump") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsDump=*/true); - if (IDVal == ".load") - return ParseDirectiveDarwinDumpOrLoad(IDLoc, /*IsLoad=*/false); - - // Look up the handler in the handler table, - bool(AsmParser::*Handler)(StringRef, SMLoc) = DirectiveMap[IDVal]; - if (Handler) - return (this->*Handler)(IDVal, IDLoc); - + + // Look up the handler in the handler table. + std::pair Handler = + DirectiveMap.lookup(IDVal); + if (Handler.first) + return (Handler.first->*Handler.second)(IDVal, IDLoc); + // Target hook for parsing target specific directives. if (!getTargetParser().ParseDirective(ID)) return false; @@ -839,7 +717,6 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { SMLoc EqualLoc = Lexer.getLoc(); const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); if (ParseExpression(Value)) return true; @@ -867,7 +744,7 @@ bool AsmParser::ParseAssignment(const StringRef &Name) { return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + Name + "'"); } else - Sym = CreateSymbol(Name); + Sym = getContext().GetOrCreateSymbol(Name); // FIXME: Handle '.'. @@ -902,90 +779,15 @@ bool AsmParser::ParseDirectiveSet() { if (ParseIdentifier(Name)) return TokError("expected identifier after '.set' directive"); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.set'"); Lex(); return ParseAssignment(Name); } -/// ParseDirectiveSection: -/// ::= .section identifier (',' identifier)* -/// FIXME: This should actually parse out the segment, section, attributes and -/// sizeof_stub fields. -bool AsmParser::ParseDirectiveDarwinSection() { - SMLoc Loc = Lexer.getLoc(); - - StringRef SectionName; - if (ParseIdentifier(SectionName)) - return Error(Loc, "expected identifier after '.section' directive"); - - // Verify there is a following comma. - if (!Lexer.is(AsmToken::Comma)) - return TokError("unexpected token in '.section' directive"); - - std::string SectionSpec = SectionName; - SectionSpec += ","; - - // Add all the tokens until the end of the line, ParseSectionSpecifier will - // handle this. - StringRef EOL = Lexer.LexUntilEndOfStatement(); - SectionSpec.append(EOL.begin(), EOL.end()); - - Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.section' directive"); - Lex(); - - - StringRef Segment, Section; - unsigned TAA, StubSize; - std::string ErrorStr = - MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, - TAA, StubSize); - - if (!ErrorStr.empty()) - return Error(Loc, ErrorStr.c_str()); - - // FIXME: Arch specific. - bool isText = Segment == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); - return false; -} - -/// ParseDirectiveSectionSwitch - -bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment, - const char *Section, - unsigned TAA, unsigned Align, - unsigned StubSize) { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in section switching directive"); - Lex(); - - // FIXME: Arch specific. - bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. - Out.SwitchSection(Ctx.getMachOSection(Segment, Section, TAA, StubSize, - isText ? SectionKind::getText() - : SectionKind::getDataRel())); - - // Set the implicit alignment, if any. - // - // FIXME: This isn't really what 'as' does; I think it just uses the implicit - // alignment on the section (e.g., if one manually inserts bytes into the - // section, then just issueing the section switch directive will not realign - // the section. However, this is arguably more reasonable behavior, and there - // is no good reason for someone to intentionally emit incorrectly sized - // values into the implicitly aligned sections. - if (Align) - Out.EmitValueToAlignment(Align, 0, 1, 0); - - return false; -} - bool AsmParser::ParseEscapedString(std::string &Data) { - assert(Lexer.is(AsmToken::String) && "Unexpected current token!"); + assert(getLexer().is(AsmToken::String) && "Unexpected current token!"); Data = ""; StringRef Str = getTok().getStringContents(); @@ -1045,25 +847,25 @@ bool AsmParser::ParseEscapedString(std::string &Data) { /// ParseDirectiveAscii: /// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ] bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.ascii' or '.asciz' directive"); - + std::string Data; if (ParseEscapedString(Data)) return true; - - Out.EmitBytes(Data, DEFAULT_ADDRSPACE); + + getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); if (ZeroTerminated) - Out.EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); - + getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + Lex(); - - if (Lexer.is(AsmToken::EndOfStatement)) + + if (getLexer().is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.ascii' or '.asciz' directive"); Lex(); } @@ -1076,24 +878,24 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) { /// ParseDirectiveValue /// ::= (.byte | .short | ... ) [ expression (, expression)* ] bool AsmParser::ParseDirectiveValue(unsigned Size) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { const MCExpr *Value; - SMLoc ATTRIBUTE_UNUSED StartLoc = Lexer.getLoc(); + SMLoc ATTRIBUTE_UNUSED StartLoc = getLexer().getLoc(); if (ParseExpression(Value)) return true; // Special case constant expressions to match code generator. if (const MCConstantExpr *MCE = dyn_cast(Value)) - Out.EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE); else - Out.EmitValue(Value, Size, DEFAULT_ADDRSPACE); + getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); - if (Lexer.is(AsmToken::EndOfStatement)) + if (getLexer().is(AsmToken::EndOfStatement)) break; // FIXME: Improve diagnostic. - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } @@ -1111,18 +913,15 @@ bool AsmParser::ParseDirectiveSpace() { return true; int64_t FillExpr = 0; - bool HasFillExpr = false; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.space' directive"); Lex(); if (ParseAbsoluteExpression(FillExpr)) return true; - HasFillExpr = true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.space' directive"); } @@ -1132,7 +931,7 @@ bool AsmParser::ParseDirectiveSpace() { return TokError("invalid number of bytes in '.space' directive"); // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - Out.EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); return false; } @@ -1144,7 +943,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(NumValues)) return true; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1152,7 +951,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillSize)) return true; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1160,7 +959,7 @@ bool AsmParser::ParseDirectiveFill() { if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.fill' directive"); Lex(); @@ -1169,7 +968,7 @@ bool AsmParser::ParseDirectiveFill() { return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); for (uint64_t i = 0, e = NumValues; i != e; ++i) - Out.EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); return false; } @@ -1178,21 +977,20 @@ bool AsmParser::ParseDirectiveFill() { /// ::= .org expression [ , expression ] bool AsmParser::ParseDirectiveOrg() { const MCExpr *Offset; - SMLoc StartLoc = Lexer.getLoc(); if (ParseExpression(Offset)) return true; // Parse optional fill expression. int64_t FillExpr = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.org' directive"); Lex(); if (ParseAbsoluteExpression(FillExpr)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.org' directive"); } @@ -1200,7 +998,7 @@ bool AsmParser::ParseDirectiveOrg() { // FIXME: Only limited forms of relocatable expressions are accepted here, it // has to be relative to the current section. - Out.EmitValueToOffset(Offset, FillExpr); + getStreamer().EmitValueToOffset(Offset, FillExpr); return false; } @@ -1208,7 +1006,7 @@ bool AsmParser::ParseDirectiveOrg() { /// ParseDirectiveAlign /// ::= {.align, ...} expression [ , expression [ , expression ]] bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { - SMLoc AlignmentLoc = Lexer.getLoc(); + SMLoc AlignmentLoc = getLexer().getLoc(); int64_t Alignment; if (ParseAbsoluteExpression(Alignment)) return true; @@ -1217,30 +1015,30 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { bool HasFillExpr = false; int64_t FillExpr = 0; int64_t MaxBytesToFill = 0; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); // The fill expression can be omitted while specifying a maximum number of // alignment bytes, e.g: // .align 3,,4 - if (Lexer.isNot(AsmToken::Comma)) { + if (getLexer().isNot(AsmToken::Comma)) { HasFillExpr = true; if (ParseAbsoluteExpression(FillExpr)) return true; } - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); - MaxBytesLoc = Lexer.getLoc(); + MaxBytesLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(MaxBytesToFill)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); } } @@ -1282,14 +1080,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { // FIXME: This should be using a target hook. bool UseCodeAlign = false; if (const MCSectionMachO *S = dyn_cast( - Out.getCurrentSection())) + getStreamer().getCurrentSection())) UseCodeAlign = S->hasAttribute(MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) && ValueSize == 1 && UseCodeAlign) { - Out.EmitCodeAlignment(Alignment, MaxBytesToFill); + getStreamer().EmitCodeAlignment(Alignment, MaxBytesToFill); } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. - Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); + getStreamer().EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill); } return false; @@ -1298,21 +1096,21 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { /// ParseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { StringRef Name; if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - Out.EmitSymbolAttribute(Sym, Attr); + getStreamer().EmitSymbolAttribute(Sym, Attr); - if (Lexer.is(AsmToken::EndOfStatement)) + if (getLexer().is(AsmToken::EndOfStatement)) break; - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); } @@ -1330,20 +1128,20 @@ bool AsmParser::ParseDirectiveELFType() { return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.type' directive"); Lex(); - if (Lexer.isNot(AsmToken::At)) + if (getLexer().isNot(AsmToken::At)) return TokError("expected '@' before type"); Lex(); StringRef Type; SMLoc TypeLoc; - TypeLoc = Lexer.getLoc(); + TypeLoc = getLexer().getLoc(); if (ParseIdentifier(Type)) return TokError("expected symbol type in directive"); @@ -1358,42 +1156,12 @@ bool AsmParser::ParseDirectiveELFType() { if (Attr == MCSA_Invalid) return Error(TypeLoc, "unsupported attribute in '.type' directive"); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.type' directive"); Lex(); - Out.EmitSymbolAttribute(Sym, Attr); - - return false; -} - -/// ParseDirectiveDarwinSymbolDesc -/// ::= .desc identifier , expression -bool AsmParser::ParseDirectiveDarwinSymbolDesc() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.desc' directive"); - Lex(); - - SMLoc DescLoc = Lexer.getLoc(); - int64_t DescValue; - if (ParseAbsoluteExpression(DescValue)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.desc' directive"); - - Lex(); - - // Set the n_desc field of this Symbol to this DescValue - Out.EmitSymbolDesc(Sym, DescValue); + getStreamer().EmitSymbolAttribute(Sym, Attr); return false; } @@ -1401,28 +1169,28 @@ bool AsmParser::ParseDirectiveDarwinSymbolDesc() { /// ParseDirectiveComm /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] bool AsmParser::ParseDirectiveComm(bool IsLocal) { - SMLoc IDLoc = Lexer.getLoc(); + SMLoc IDLoc = getLexer().getLoc(); StringRef Name; if (ParseIdentifier(Name)) return TokError("expected identifier in directive"); // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - if (Lexer.isNot(AsmToken::Comma)) + if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in directive"); Lex(); int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); + SMLoc SizeLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(Size)) return true; int64_t Pow2Alignment = 0; SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { + if (getLexer().is(AsmToken::Comma)) { Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); + Pow2AlignmentLoc = getLexer().getLoc(); if (ParseAbsoluteExpression(Pow2Alignment)) return true; @@ -1434,7 +1202,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { } } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.comm' or '.lcomm' directive"); Lex(); @@ -1458,168 +1226,14 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { // '.lcomm' is equivalent to '.zerofill'. // Create the Symbol as a common or local common with Size and Pow2Alignment if (IsLocal) { - Out.EmitZerofill(Ctx.getMachOSection("__DATA", "__bss", - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), - Sym, Size, 1 << Pow2Alignment); + getStreamer().EmitZerofill(Ctx.getMachOSection( + "__DATA", "__bss", MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); return false; } - Out.EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); - return false; -} - -/// ParseDirectiveDarwinZerofill -/// ::= .zerofill segname , sectname [, identifier , size_expression [ -/// , align_expression ]] -bool AsmParser::ParseDirectiveDarwinZerofill() { - StringRef Segment; - if (ParseIdentifier(Segment)) - return TokError("expected segment name after '.zerofill' directive"); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - StringRef Section; - if (ParseIdentifier(Section)) - return TokError("expected section name after comma in '.zerofill' " - "directive"); - - // If this is the end of the line all that was wanted was to create the - // the section but with no symbol. - if (Lexer.is(AsmToken::EndOfStatement)) { - // Create the zerofill section but no symbol - Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS())); - return false; - } - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - SMLoc IDLoc = Lexer.getLoc(); - StringRef IDStr; - if (ParseIdentifier(IDStr)) - return TokError("expected identifier in directive"); - - // handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(IDStr); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.zerofill' directive"); - - Lex(); - - if (Size < 0) - return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " - "than zero"); - - // NOTE: The alignment in the directive is a power of 2 value, the assembler - // may internally end up wanting an alignment in bytes. - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " - "can't be less than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - // Create the zerofill Symbol with Size and Pow2Alignment - // - // FIXME: Arch specific. - Out.EmitZerofill(Ctx.getMachOSection(Segment, Section, - MCSectionMachO::S_ZEROFILL, 0, - SectionKind::getBSS()), - Sym, Size, 1 << Pow2Alignment); - - return false; -} - -/// ParseDirectiveDarwinTBSS -/// ::= .tbss identifier, size, align -bool AsmParser::ParseDirectiveDarwinTBSS() { - SMLoc IDLoc = Lexer.getLoc(); - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in directive"); - Lex(); - - int64_t Size; - SMLoc SizeLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Size)) - return true; - - int64_t Pow2Alignment = 0; - SMLoc Pow2AlignmentLoc; - if (Lexer.is(AsmToken::Comma)) { - Lex(); - Pow2AlignmentLoc = Lexer.getLoc(); - if (ParseAbsoluteExpression(Pow2Alignment)) - return true; - } - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.tbss' directive"); - - Lex(); - - if (Size < 0) - return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" - "zero"); - - // FIXME: Diagnose overflow. - if (Pow2Alignment < 0) - return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" - "than zero"); - - if (!Sym->isUndefined()) - return Error(IDLoc, "invalid symbol redefinition"); - - Out.EmitTBSSSymbol(Ctx.getMachOSection("__DATA", "__thread_bss", - MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, - 0, SectionKind::getThreadBSS()), - Sym, Size, 1 << Pow2Alignment); - - return false; -} - -/// ParseDirectiveDarwinSubsectionsViaSymbols -/// ::= .subsections_via_symbols -bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.subsections_via_symbols' directive"); - - Lex(); - - Out.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); - + getStreamer().EmitCommonSymbol(Sym, Size, 1 << Pow2Alignment); return false; } @@ -1627,11 +1241,11 @@ bool AsmParser::ParseDirectiveDarwinSubsectionsViaSymbols() { /// ::= .abort [ "abort_string" ] bool AsmParser::ParseDirectiveAbort() { // FIXME: Use loc from directive. - SMLoc Loc = Lexer.getLoc(); + SMLoc Loc = getLexer().getLoc(); StringRef Str = ""; - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.abort' directive"); Str = getTok().getString(); @@ -1639,7 +1253,7 @@ bool AsmParser::ParseDirectiveAbort() { Lex(); } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.abort' directive"); Lex(); @@ -1653,48 +1267,17 @@ bool AsmParser::ParseDirectiveAbort() { return false; } -/// ParseDirectiveLsym -/// ::= .lsym identifier , expression -bool AsmParser::ParseDirectiveDarwinLsym() { - StringRef Name; - if (ParseIdentifier(Name)) - return TokError("expected identifier in directive"); - - // Handle the identifier as the key symbol. - MCSymbol *Sym = CreateSymbol(Name); - - if (Lexer.isNot(AsmToken::Comma)) - return TokError("unexpected token in '.lsym' directive"); - Lex(); - - const MCExpr *Value; - SMLoc StartLoc = Lexer.getLoc(); - if (ParseExpression(Value)) - return true; - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.lsym' directive"); - - Lex(); - - // We don't currently support this directive. - // - // FIXME: Diagnostic location! - (void) Sym; - return TokError("directive '.lsym' is unsupported"); -} - /// ParseDirectiveInclude /// ::= .include "filename" bool AsmParser::ParseDirectiveInclude() { - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.include' directive"); std::string Filename = getTok().getString(); - SMLoc IncludeLoc = Lexer.getLoc(); + SMLoc IncludeLoc = getLexer().getLoc(); Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.include' directive"); // Strip the quotes. @@ -1712,29 +1295,6 @@ bool AsmParser::ParseDirectiveInclude() { return false; } -/// ParseDirectiveDarwinDumpOrLoad -/// ::= ( .dump | .load ) "filename" -bool AsmParser::ParseDirectiveDarwinDumpOrLoad(SMLoc IDLoc, bool IsDump) { - if (Lexer.isNot(AsmToken::String)) - return TokError("expected string in '.dump' or '.load' directive"); - - Lex(); - - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.dump' or '.load' directive"); - - Lex(); - - // FIXME: If/when .dump and .load are implemented they will be done in the - // the assembly parser and not have any need for an MCStreamer API. - if (IsDump) - Warning(IDLoc, "ignoring directive .dump for now"); - else - Warning(IDLoc, "ignoring directive .load for now"); - - return false; -} - /// ParseDirectiveIf /// ::= .if expression bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { @@ -1748,7 +1308,7 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { if (ParseAbsoluteExpression(ExprValue)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.if' directive"); Lex(); @@ -1781,7 +1341,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { if (ParseAbsoluteExpression(ExprValue)) return true; - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.elseif' directive"); Lex(); @@ -1795,7 +1355,7 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { /// ParseDirectiveElse /// ::= .else bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.else' directive"); Lex(); @@ -1819,7 +1379,7 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { /// ParseDirectiveEndIf /// ::= .endif bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.endif' directive"); Lex(); @@ -1838,40 +1398,40 @@ bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { /// ParseDirectiveFile /// ::= .file [number] string -bool AsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { +bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { // FIXME: I'm not sure what this is. int64_t FileNumber = -1; - if (Lexer.is(AsmToken::Integer)) { + if (getLexer().is(AsmToken::Integer)) { FileNumber = getTok().getIntVal(); Lex(); - + if (FileNumber < 1) return TokError("file number less than one"); } - if (Lexer.isNot(AsmToken::String)) + if (getLexer().isNot(AsmToken::String)) return TokError("unexpected token in '.file' directive"); - + StringRef Filename = getTok().getString(); Filename = Filename.substr(1, Filename.size()-2); Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); if (FileNumber == -1) - Out.EmitFileDirective(Filename); + getStreamer().EmitFileDirective(Filename); else - Out.EmitDwarfFileDirective(FileNumber, Filename); - + getStreamer().EmitDwarfFileDirective(FileNumber, Filename); + return false; } /// ParseDirectiveLine /// ::= .line [number] -bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) +bool GenericAsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.line' directive"); int64_t LineNumber = getTok().getIntVal(); @@ -1881,8 +1441,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { // FIXME: Do something with the .line. } - if (Lexer.isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.file' directive"); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.line' directive"); return false; } @@ -1890,8 +1450,8 @@ bool AsmParser::ParseDirectiveLine(StringRef, SMLoc DirectiveLoc) { /// ParseDirectiveLoc /// ::= .loc number [number [number]] -bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { - if (Lexer.isNot(AsmToken::Integer)) +bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); // FIXME: What are these fields? @@ -1900,16 +1460,16 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { // FIXME: Validate file. Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); int64_t Param2 = getTok().getIntVal(); (void) Param2; Lex(); - if (Lexer.isNot(AsmToken::EndOfStatement)) { - if (Lexer.isNot(AsmToken::Integer)) + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); int64_t Param3 = getTok().getIntVal(); @@ -1920,7 +1480,7 @@ bool AsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { } } - if (Lexer.isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); return false; diff --git a/lib/MC/MCParser/CMakeLists.txt b/lib/MC/MCParser/CMakeLists.txt index a5c0818..25a7bf4 100644 --- a/lib/MC/MCParser/CMakeLists.txt +++ b/lib/MC/MCParser/CMakeLists.txt @@ -1,7 +1,10 @@ add_llvm_library(LLVMMCParser AsmLexer.cpp AsmParser.cpp + DarwinAsmParser.cpp + ELFAsmParser.cpp MCAsmLexer.cpp MCAsmParser.cpp + MCAsmParserExtension.cpp TargetAsmParser.cpp ) diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp new file mode 100644 index 0000000..7d8639e --- /dev/null +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -0,0 +1,758 @@ +//===- DarwinAsmParser.cpp - Darwin (Mach-O) Assembly Parser --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +using namespace llvm; + +namespace { + +/// \brief Implementation of directive handling which is shared across all +/// Darwin targets. +class DarwinAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(const char *Segment, const char *Section, + unsigned TAA = 0, unsigned ImplicitAlign = 0, + unsigned StubSize = 0); + +public: + DarwinAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".desc", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDesc)); + Parser.AddDirectiveHandler(this, ".lsym", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveLsym)); + Parser.AddDirectiveHandler(this, ".subsections_via_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols)); + Parser.AddDirectiveHandler(this, ".dump", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".load", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveDumpOrLoad)); + Parser.AddDirectiveHandler(this, ".section", MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSection)); + Parser.AddDirectiveHandler(this, ".secure_log_unique", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogUnique)); + Parser.AddDirectiveHandler(this, ".secure_log_reset", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveSecureLogReset)); + Parser.AddDirectiveHandler(this, ".tbss", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveTBSS)); + Parser.AddDirectiveHandler(this, ".zerofill", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseDirectiveZerofill)); + + // Special section directives. + Parser.AddDirectiveHandler(this, ".const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConst)); + Parser.AddDirectiveHandler(this, ".const_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstData)); + Parser.AddDirectiveHandler(this, ".constructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveConstructor)); + Parser.AddDirectiveHandler(this, ".cstring", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveCString)); + Parser.AddDirectiveHandler(this, ".data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".destructor", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDestructor)); + Parser.AddDirectiveHandler(this, ".dyld", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveDyld)); + Parser.AddDirectiveHandler(this, ".fvmlib_init0", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit0)); + Parser.AddDirectiveHandler(this, ".fvmlib_init1", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveFVMLibInit1)); + Parser.AddDirectiveHandler(this, ".lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".literal16", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral16)); + Parser.AddDirectiveHandler(this, ".literal4", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral4)); + Parser.AddDirectiveHandler(this, ".literal8", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveLiteral8)); + Parser.AddDirectiveHandler(this, ".mod_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModInitFunc)); + Parser.AddDirectiveHandler(this, ".mod_term_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveModTermFunc)); + Parser.AddDirectiveHandler(this, ".non_lazy_symbol_pointer", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveNonLazySymbolPointers)); + Parser.AddDirectiveHandler(this, ".objc_cat_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cat_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCatInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_category", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCCategory)); + Parser.AddDirectiveHandler(this, ".objc_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClass)); + Parser.AddDirectiveHandler(this, ".objc_class_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassNames)); + Parser.AddDirectiveHandler(this, ".objc_class_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClassVars)); + Parser.AddDirectiveHandler(this, ".objc_cls_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsMeth)); + Parser.AddDirectiveHandler(this, ".objc_cls_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCClsRefs)); + Parser.AddDirectiveHandler(this, ".objc_inst_meth", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstMeth)); + Parser.AddDirectiveHandler(this, ".objc_instance_vars", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCInstanceVars)); + Parser.AddDirectiveHandler(this, ".objc_message_refs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMessageRefs)); + Parser.AddDirectiveHandler(this, ".objc_meta_class", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMetaClass)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_names", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarNames)); + Parser.AddDirectiveHandler(this, ".objc_meth_var_types", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCMethVarTypes)); + Parser.AddDirectiveHandler(this, ".objc_module_info", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCModuleInfo)); + Parser.AddDirectiveHandler(this, ".objc_protocol", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCProtocol)); + Parser.AddDirectiveHandler(this, ".objc_selector_strs", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSelectorStrs)); + Parser.AddDirectiveHandler(this, ".objc_string_object", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCStringObject)); + Parser.AddDirectiveHandler(this, ".objc_symbols", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveObjCSymbols)); + Parser.AddDirectiveHandler(this, ".picsymbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectivePICSymbolStub)); + Parser.AddDirectiveHandler(this, ".static_const", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticConst)); + Parser.AddDirectiveHandler(this, ".static_data", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveStaticData)); + Parser.AddDirectiveHandler(this, ".symbol_stub", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveSymbolStub)); + Parser.AddDirectiveHandler(this, ".tdata", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTData)); + Parser.AddDirectiveHandler(this, ".text", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveText)); + Parser.AddDirectiveHandler(this, ".thread_init_func", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveThreadInitFunc)); + Parser.AddDirectiveHandler(this, ".tlv", + MCAsmParser::DirectiveHandler( + &DarwinAsmParser::ParseSectionDirectiveTLV)); + } + + bool ParseDirectiveDesc(StringRef, SMLoc); + bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); + bool ParseDirectiveLsym(StringRef, SMLoc); + bool ParseDirectiveSection(); + bool ParseDirectiveSecureLogReset(StringRef, SMLoc); + bool ParseDirectiveSecureLogUnique(StringRef, SMLoc); + bool ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc); + bool ParseDirectiveTBSS(StringRef, SMLoc); + bool ParseDirectiveZerofill(StringRef, SMLoc); + + // Named Section Directive + bool ParseSectionDirectiveConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__const"); + } + bool ParseSectionDirectiveStaticConst(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__static_const"); + } + bool ParseSectionDirectiveCString(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveLiteral4(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal4", + MCSectionMachO::S_4BYTE_LITERALS, 4); + } + bool ParseSectionDirectiveLiteral8(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__literal8", + MCSectionMachO::S_8BYTE_LITERALS, 8); + } + bool ParseSectionDirectiveLiteral16(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__literal16", + MCSectionMachO::S_16BYTE_LITERALS, 16); + } + bool ParseSectionDirectiveConstructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__constructor"); + } + bool ParseSectionDirectiveDestructor(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__destructor"); + } + bool ParseSectionDirectiveFVMLibInit0(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init0"); + } + bool ParseSectionDirectiveFVMLibInit1(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__fvmlib_init1"); + } + bool ParseSectionDirectiveSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__symbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, + // FIXME: Different on PPC and ARM. + 0, 16); + } + bool ParseSectionDirectivePICSymbolStub(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT","__picsymbol_stub", + MCSectionMachO::S_SYMBOL_STUBS | + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS, 0, 26); + } + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__data"); + } + bool ParseSectionDirectiveStaticData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__static_data"); + } + bool ParseSectionDirectiveNonLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__nl_symbol_ptr", + MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveLazySymbolPointers(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__la_symbol_ptr", + MCSectionMachO::S_LAZY_SYMBOL_POINTERS, 4); + } + bool ParseSectionDirectiveDyld(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__dyld"); + } + bool ParseSectionDirectiveModInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_init_func", + MCSectionMachO::S_MOD_INIT_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveModTermFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__mod_term_func", + MCSectionMachO::S_MOD_TERM_FUNC_POINTERS, 4); + } + bool ParseSectionDirectiveConstData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__const"); + } + bool ParseSectionDirectiveObjCClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCMetaClass(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__meta_class", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCatInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cat_inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCProtocol(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__protocol", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCStringObject(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__string_object", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstMeth(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__inst_meth", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClsRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__cls_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCMessageRefs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__message_refs", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP | + MCSectionMachO::S_LITERAL_POINTERS, 4); + } + bool ParseSectionDirectiveObjCSymbols(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__symbols", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCCategory(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__category", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__class_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCInstanceVars(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__instance_vars", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCModuleInfo(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__module_info", + MCSectionMachO::S_ATTR_NO_DEAD_STRIP); + } + bool ParseSectionDirectiveObjCClassNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarTypes(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCMethVarNames(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__cstring", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveObjCSelectorStrs(StringRef, SMLoc) { + return ParseSectionSwitch("__OBJC", "__selector_strs", + MCSectionMachO::S_CSTRING_LITERALS); + } + bool ParseSectionDirectiveTData(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_data", + MCSectionMachO::S_THREAD_LOCAL_REGULAR); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch("__TEXT", "__text", + MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS); + } + bool ParseSectionDirectiveTLV(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_vars", + MCSectionMachO::S_THREAD_LOCAL_VARIABLES); + } + bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__thread_init", + MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); + } + +}; + +} + +bool DarwinAsmParser::ParseSectionSwitch(const char *Segment, + const char *Section, + unsigned TAA, unsigned Align, + unsigned StubSize) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + // FIXME: Arch specific. + bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + + // Set the implicit alignment, if any. + // + // FIXME: This isn't really what 'as' does; I think it just uses the implicit + // alignment on the section (e.g., if one manually inserts bytes into the + // section, then just issueing the section switch directive will not realign + // the section. However, this is arguably more reasonable behavior, and there + // is no good reason for someone to intentionally emit incorrectly sized + // values into the implicitly aligned sections. + if (Align) + getStreamer().EmitValueToAlignment(Align, 0, 1, 0); + + return false; +} + +/// ParseDirectiveDesc +/// ::= .desc identifier , expression +bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.desc' directive"); + Lex(); + + int64_t DescValue; + if (getParser().ParseAbsoluteExpression(DescValue)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.desc' directive"); + + Lex(); + + // Set the n_desc field of this Symbol to this DescValue + getStreamer().EmitSymbolDesc(Sym, DescValue); + + return false; +} + +/// ParseDirectiveDumpOrLoad +/// ::= ( .dump | .load ) "filename" +bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, + SMLoc IDLoc) { + bool IsDump = Directive == ".dump"; + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in '.dump' or '.load' directive"); + + Lex(); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.dump' or '.load' directive"); + + Lex(); + + // FIXME: If/when .dump and .load are implemented they will be done in the + // the assembly parser and not have any need for an MCStreamer API. + if (IsDump) + Warning(IDLoc, "ignoring directive .dump for now"); + else + Warning(IDLoc, "ignoring directive .load for now"); + + return false; +} + +/// ParseDirectiveLsym +/// ::= .lsym identifier , expression +bool DarwinAsmParser::ParseDirectiveLsym(StringRef, SMLoc) { + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.lsym' directive"); + Lex(); + + const MCExpr *Value; + if (getParser().ParseExpression(Value)) + return true; + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.lsym' directive"); + + Lex(); + + // We don't currently support this directive. + // + // FIXME: Diagnostic location! + (void) Sym; + return TokError("directive '.lsym' is unsupported"); +} + +/// ParseDirectiveSection: +/// ::= .section identifier (',' identifier)* +bool DarwinAsmParser::ParseDirectiveSection() { + SMLoc Loc = getLexer().getLoc(); + + StringRef SectionName; + if (getParser().ParseIdentifier(SectionName)) + return Error(Loc, "expected identifier after '.section' directive"); + + // Verify there is a following comma. + if (!getLexer().is(AsmToken::Comma)) + return TokError("unexpected token in '.section' directive"); + + std::string SectionSpec = SectionName; + SectionSpec += ","; + + // Add all the tokens until the end of the line, ParseSectionSpecifier will + // handle this. + StringRef EOL = getLexer().LexUntilEndOfStatement(); + SectionSpec.append(EOL.begin(), EOL.end()); + + Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.section' directive"); + Lex(); + + + StringRef Segment, Section; + unsigned TAA, StubSize; + std::string ErrorStr = + MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, + TAA, StubSize); + + if (!ErrorStr.empty()) + return Error(Loc, ErrorStr.c_str()); + + // FIXME: Arch specific. + bool isText = Segment == "__TEXT"; // FIXME: Hack. + getStreamer().SwitchSection(getContext().getMachOSection( + Segment, Section, TAA, StubSize, + isText ? SectionKind::getText() + : SectionKind::getDataRel())); + return false; +} + +/// ParseDirectiveSecureLogUnique +/// ::= .secure_log_unique "log message" +bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { + std::string LogMessage; + + if (getLexer().isNot(AsmToken::String)) + LogMessage = ""; + else{ + LogMessage = getTok().getString(); + Lex(); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_unique' directive"); + + if (getContext().getSecureLogUsed() != false) + return Error(IDLoc, ".secure_log_unique specified multiple times"); + + char *SecureLogFile = getContext().getSecureLogFile(); + if (SecureLogFile == NULL) + return Error(IDLoc, ".secure_log_unique used but AS_SECURE_LOG_FILE " + "environment variable unset."); + + raw_ostream *OS = getContext().getSecureLog(); + if (OS == NULL) { + std::string Err; + OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append); + if (!Err.empty()) { + delete OS; + return Error(IDLoc, Twine("can't open secure log file: ") + + SecureLogFile + " (" + Err + ")"); + } + getContext().setSecureLog(OS); + } + + int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); + *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() + << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" + << LogMessage + "\n"; + + getContext().setSecureLogUsed(true); + + return false; +} + +/// ParseDirectiveSecureLogReset +/// ::= .secure_log_reset +bool DarwinAsmParser::ParseDirectiveSecureLogReset(StringRef, SMLoc IDLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.secure_log_reset' directive"); + + Lex(); + + getContext().setSecureLogUsed(false); + + return false; +} + +/// ParseDirectiveSubsectionsViaSymbols +/// ::= .subsections_via_symbols +bool DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols(StringRef, SMLoc) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.subsections_via_symbols' directive"); + + Lex(); + + getStreamer().EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); + + return false; +} + +/// ParseDirectiveTBSS +/// ::= .tbss identifier, size, align +bool DarwinAsmParser::ParseDirectiveTBSS(StringRef, SMLoc) { + SMLoc IDLoc = getLexer().getLoc(); + StringRef Name; + if (getParser().ParseIdentifier(Name)) + return TokError("expected identifier in directive"); + + // Handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.tbss' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.tbss' directive size, can't be less than" + "zero"); + + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.tbss' alignment, can't be less" + "than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + getStreamer().EmitTBSSSymbol(getContext().getMachOSection( + "__DATA", "__thread_bss", + MCSectionMachO::S_THREAD_LOCAL_ZEROFILL, + 0, SectionKind::getThreadBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +/// ParseDirectiveZerofill +/// ::= .zerofill segname , sectname [, identifier , size_expression [ +/// , align_expression ]] +bool DarwinAsmParser::ParseDirectiveZerofill(StringRef, SMLoc) { + StringRef Segment; + if (getParser().ParseIdentifier(Segment)) + return TokError("expected segment name after '.zerofill' directive"); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + StringRef Section; + if (getParser().ParseIdentifier(Section)) + return TokError("expected section name after comma in '.zerofill' " + "directive"); + + // If this is the end of the line all that was wanted was to create the + // the section but with no symbol. + if (getLexer().is(AsmToken::EndOfStatement)) { + // Create the zerofill section but no symbol + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS())); + return false; + } + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + SMLoc IDLoc = getLexer().getLoc(); + StringRef IDStr; + if (getParser().ParseIdentifier(IDStr)) + return TokError("expected identifier in directive"); + + // handle the identifier as the key symbol. + MCSymbol *Sym = getContext().GetOrCreateSymbol(IDStr); + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Size; + SMLoc SizeLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Size)) + return true; + + int64_t Pow2Alignment = 0; + SMLoc Pow2AlignmentLoc; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + Pow2AlignmentLoc = getLexer().getLoc(); + if (getParser().ParseAbsoluteExpression(Pow2Alignment)) + return true; + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.zerofill' directive"); + + Lex(); + + if (Size < 0) + return Error(SizeLoc, "invalid '.zerofill' directive size, can't be less " + "than zero"); + + // NOTE: The alignment in the directive is a power of 2 value, the assembler + // may internally end up wanting an alignment in bytes. + // FIXME: Diagnose overflow. + if (Pow2Alignment < 0) + return Error(Pow2AlignmentLoc, "invalid '.zerofill' directive alignment, " + "can't be less than zero"); + + if (!Sym->isUndefined()) + return Error(IDLoc, "invalid symbol redefinition"); + + // Create the zerofill Symbol with Size and Pow2Alignment + // + // FIXME: Arch specific. + getStreamer().EmitZerofill(getContext().getMachOSection( + Segment, Section, MCSectionMachO::S_ZEROFILL, + 0, SectionKind::getBSS()), + Sym, Size, 1 << Pow2Alignment); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createDarwinAsmParser() { + return new DarwinAsmParser; +} + +} diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp new file mode 100644 index 0000000..7a54dd3 --- /dev/null +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -0,0 +1,68 @@ +//===- ELFAsmParser.cpp - ELF Assembly Parser -----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCParser/MCAsmLexer.h" +using namespace llvm; + +namespace { + +class ELFAsmParser : public MCAsmParserExtension { + bool ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind); + +public: + ELFAsmParser() {} + + virtual void Initialize(MCAsmParser &Parser) { + // Call the base implementation. + this->MCAsmParserExtension::Initialize(Parser); + + Parser.AddDirectiveHandler(this, ".data", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveData)); + Parser.AddDirectiveHandler(this, ".text", MCAsmParser::DirectiveHandler( + &ELFAsmParser::ParseSectionDirectiveText)); + } + + bool ParseSectionDirectiveData(StringRef, SMLoc) { + return ParseSectionSwitch(".data", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC, + SectionKind::getDataRel()); + } + bool ParseSectionDirectiveText(StringRef, SMLoc) { + return ParseSectionSwitch(".text", MCSectionELF::SHT_PROGBITS, + MCSectionELF::SHF_EXECINSTR | + MCSectionELF::SHF_ALLOC, SectionKind::getText()); + } +}; + +} + +bool ELFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Type, + unsigned Flags, SectionKind Kind) { + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in section switching directive"); + Lex(); + + getStreamer().SwitchSection(getContext().getELFSection( + Section, Type, Flags, Kind)); + + return false; +} + +namespace llvm { + +MCAsmParserExtension *createELFAsmParser() { + return new ELFAsmParser; +} + +} diff --git a/lib/MC/MCParser/MCAsmLexer.cpp b/lib/MC/MCParser/MCAsmLexer.cpp index e5b2955..dceece7 100644 --- a/lib/MC/MCParser/MCAsmLexer.cpp +++ b/lib/MC/MCParser/MCAsmLexer.cpp @@ -12,12 +12,16 @@ using namespace llvm; -MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()) { +MCAsmLexer::MCAsmLexer() : CurTok(AsmToken::Error, StringRef()), TokStart(0) { } MCAsmLexer::~MCAsmLexer() { } +SMLoc MCAsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + SMLoc AsmToken::getLoc() const { return SMLoc::getFromPointer(Str.data()); } diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp index b8c2054..bee3064 100644 --- a/lib/MC/MCParser/MCAsmParser.cpp +++ b/lib/MC/MCParser/MCAsmParser.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/Support/SourceMgr.h" @@ -23,6 +24,11 @@ const AsmToken &MCAsmParser::getTok() { return getLexer().getTok(); } +bool MCAsmParser::TokError(const char *Msg) { + Error(getLexer().getLoc(), Msg); + return true; +} + bool MCAsmParser::ParseExpression(const MCExpr *&Res) { SMLoc L; return ParseExpression(Res, L); diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp new file mode 100644 index 0000000..c30d306 --- /dev/null +++ b/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -0,0 +1,21 @@ +//===-- MCAsmParserExtension.cpp - Asm Parser Hooks -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCParser/MCAsmParserExtension.h" +using namespace llvm; + +MCAsmParserExtension::MCAsmParserExtension() { +} + +MCAsmParserExtension::~MCAsmParserExtension() { +} + +void MCAsmParserExtension::Initialize(MCAsmParser &Parser) { + this->Parser = &Parser; +} diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index d57bb0c..eb53160 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -44,28 +44,28 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << 'w'; else OS << 'r'; - if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE) + if (getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) OS << 'n'; OS << "\"\n"; - if (getCharacteristics() & MCSectionCOFF::IMAGE_SCN_LNK_COMDAT) { + if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) { switch (Selection) { - case IMAGE_COMDAT_SELECT_NODUPLICATES: + case COFF::IMAGE_COMDAT_SELECT_NODUPLICATES: OS << "\t.linkonce one_only\n"; break; - case IMAGE_COMDAT_SELECT_ANY: + case COFF::IMAGE_COMDAT_SELECT_ANY: OS << "\t.linkonce discard\n"; break; - case IMAGE_COMDAT_SELECT_SAME_SIZE: + case COFF::IMAGE_COMDAT_SELECT_SAME_SIZE: OS << "\t.linkonce same_size\n"; break; - case IMAGE_COMDAT_SELECT_EXACT_MATCH: + case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: OS << "\t.linkonce same_contents\n"; break; //NOTE: as of binutils 2.20, there is no way to specifiy select largest // with the .linkonce directive. For now, we treat it as an invalid // comdat selection value. - case IMAGE_COMDAT_SELECT_LARGEST: + case COFF::IMAGE_COMDAT_SELECT_LARGEST: // OS << "\t.linkonce largest\n"; // break; default: diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 3207e99..7ca0951 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -33,6 +33,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { default: llvm_unreachable("invalid fixup kind!"); case X86::reloc_pcrel_1byte: case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: case FK_Data_2: return 1; case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: @@ -47,6 +48,7 @@ static bool isFixupKindPCRel(unsigned Kind) { default: return false; case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_2byte: case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: @@ -738,6 +740,51 @@ public: Relocations[Fragment->getParent()].push_back(MRE); } + void RecordTLVPRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP && + !Is64Bit && + "Should only be called with a 32-bit TLVP relocation!"); + + unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); + unsigned IsPCRel = 0; + + // Get the symbol data. + MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol()); + unsigned Index = SD_A->getIndex(); + + // We're only going to have a second symbol in pic mode and it'll be a + // subtraction from the picbase. For 32-bit pic the addend is the difference + // between the picbase and the next address. For 32-bit static the addend + // is zero. + if (Target.getSymB()) { + // If this is a subtraction then we're pcrel. + uint32_t FixupAddress = + Layout.getFragmentAddress(Fragment) + Fixup.getOffset(); + MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol()); + IsPCRel = 1; + FixedValue = (FixupAddress - Layout.getSymbolAddress(SD_B) + + Target.getConstant()); + FixedValue += 1 << Log2Size; + } else { + FixedValue = 0; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Value; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (1 << 27) | // Extern + (RIT_TLV << 28)); // Type + Relocations[Fragment->getParent()].push_back(MRE); + } + void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, uint64_t &FixedValue) { @@ -749,6 +796,12 @@ public: unsigned IsPCRel = isFixupKindPCRel(Fixup.getKind()); unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind()); + // If this is a 32-bit TLVP reloc it's handled a bit differently. + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) { + RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue); + return; + } + // If this is a difference or a defined symbol plus an offset, then we need // a scattered relocation entry. // Differences always require scattered relocations. @@ -772,7 +825,6 @@ public: // See . uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); - uint32_t Value = 0; unsigned Index = 0; unsigned IsExtern = 0; unsigned Type = 0; @@ -783,7 +835,6 @@ public: // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. Type = RIT_Vanilla; - Value = 0; } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { @@ -794,11 +845,9 @@ public: // undefined. This occurs with weak definitions, for example. if (!SD->Symbol->isUndefined()) FixedValue -= Layout.getSymbolAddress(SD); - Value = 0; } else { // The index is the section ordinal (1-based). Index = SD->getFragment()->getParent()->getOrdinal() + 1; - Value = Layout.getSymbolAddress(SD); } Type = RIT_Vanilla; @@ -898,7 +947,7 @@ public: const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it)) + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (!it->isExternal() && !Symbol.isUndefined()) @@ -934,7 +983,7 @@ public: const MCSymbol &Symbol = it->getSymbol(); // Ignore non-linker visible symbols. - if (!Asm.isSymbolLinkerVisible(it)) + if (!Asm.isSymbolLinkerVisible(it->getSymbol())) continue; if (it->isExternal() || Symbol.isUndefined()) diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp new file mode 100644 index 0000000..6804766 --- /dev/null +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -0,0 +1,71 @@ +//===-- llvm/MC/WinCOFFObjectWriter.cpp -------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file writer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFObjectWriter" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCValue.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmLayout.h" +using namespace llvm; + +namespace { + + class WinCOFFObjectWriter : public MCObjectWriter { + public: + WinCOFFObjectWriter(raw_ostream &OS); + + // MCObjectWriter interface implementation. + + void ExecutePostLayoutBinding(MCAssembler &Asm); + + void RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue); + + void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout); + }; +} + +WinCOFFObjectWriter::WinCOFFObjectWriter(raw_ostream &OS) + : MCObjectWriter(OS, true) { +} + +//////////////////////////////////////////////////////////////////////////////// +// MCObjectWriter interface implementations + +void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { +} + +void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCFragment *Fragment, + const MCFixup &Fixup, + MCValue Target, + uint64_t &FixedValue) { +} + +void WinCOFFObjectWriter::WriteObject(const MCAssembler &Asm, + const MCAsmLayout &Layout) { +} + +//------------------------------------------------------------------------------ +// WinCOFFObjectWriter factory function + +namespace llvm { + MCObjectWriter *createWinCOFFObjectWriter(raw_ostream &OS) { + return new WinCOFFObjectWriter(OS); + } +} diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp new file mode 100644 index 0000000..1030cdb --- /dev/null +++ b/lib/MC/WinCOFFStreamer.cpp @@ -0,0 +1,198 @@ +//===-- llvm/MC/WinCOFFStreamer.cpp -----------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an implementation of a Win32 COFF object file streamer. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "WinCOFFStreamer" + +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCCodeEmitter.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/Target/TargetAsmBackend.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define dbg_notimpl(x) \ + do { dbgs() << "not implemented, " << __FUNCTION__ << " (" << x << ")"; \ + abort(); } while (false); + +namespace { +class WinCOFFStreamer : public MCObjectStreamer { +public: + WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS); + + // MCStreamer interface + + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); + virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); + virtual void EmitCOFFSymbolStorageClass(int StorageClass); + virtual void EmitCOFFSymbolType(int Type); + virtual void EndCOFFSymbolDef(); + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment); + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment); + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace); + virtual void EmitGPRel32Value(const MCExpr *Value); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, unsigned MaxBytesToEmit); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit); + virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value); + virtual void EmitFileDirective(StringRef Filename); + virtual void EmitDwarfFileDirective(unsigned FileNo,StringRef Filename); + virtual void EmitInstruction(const MCInst &Instruction); + virtual void Finish(); +}; +} // end anonymous namespace. + +WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) + : MCObjectStreamer(Context, TAB, OS, &CE) { +} + +// MCStreamer interface + +void WinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { +} + +void WinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + dbg_notimpl("Flag = " << Flag); +} + +void WinCOFFStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { +} + +void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { +} + +void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", DescValue = "<< DescValue); +} + +void WinCOFFStreamer::BeginCOFFSymbolDef(MCSymbol const *Symbol) { +} + +void WinCOFFStreamer::EmitCOFFSymbolStorageClass(int StorageClass) { +} + +void WinCOFFStreamer::EmitCOFFSymbolType(int Type) { +} + +void WinCOFFStreamer::EndCOFFSymbolDef() { +} + +void WinCOFFStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + dbg_notimpl("Symbol = " << Symbol->getName() << ", Value = " << *Value); +} + +void WinCOFFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { +} + +void WinCOFFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { +} + +void WinCOFFStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + unsigned Size,unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + MCSectionCOFF const *SectionCOFF = + static_cast(Section); + + dbg_notimpl("Section = " << SectionCOFF->getSectionName() << ", Symbol = " << + Symbol->getName() << ", Size = " << Size << ", ByteAlignment = " + << ByteAlignment); +} + +void WinCOFFStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitValue(const MCExpr *Value, unsigned Size, + unsigned AddrSpace) { +} + +void WinCOFFStreamer::EmitGPRel32Value(const MCExpr *Value) { + dbg_notimpl("Value = '" << *Value); +} + +void WinCOFFStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) { +} + +void WinCOFFStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0) { +} + +void WinCOFFStreamer::EmitValueToOffset(const MCExpr *Offset, + unsigned char Value = 0) { + dbg_notimpl("Offset = '" << *Offset << "', Value = " << Value); +} + +void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { + // Ignore for now, linkers don't care, and proper debug + // info will be a much large effort. +} + +void WinCOFFStreamer::EmitDwarfFileDirective(unsigned FileNo, + StringRef Filename) { + dbg_notimpl("FileNo = " << FileNo << ", Filename = '" << Filename << "'"); +} + +void WinCOFFStreamer::EmitInstruction(const MCInst &Instruction) { +} + +void WinCOFFStreamer::Finish() { + MCObjectStreamer::Finish(); +} + +namespace llvm +{ + MCStreamer *createWinCOFFStreamer(MCContext &Context, + TargetAsmBackend &TAB, + MCCodeEmitter &CE, + raw_ostream &OS) { + return new WinCOFFStreamer(Context, TAB, CE, OS); + } +} diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index f1347f9..366d2f7 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMSupport ConstantRange.cpp Debug.cpp DeltaAlgorithm.cpp + DAGDeltaAlgorithm.cpp Dwarf.cpp ErrorHandling.cpp FileUtilities.cpp diff --git a/lib/Support/DAGDeltaAlgorithm.cpp b/lib/Support/DAGDeltaAlgorithm.cpp new file mode 100644 index 0000000..8145664 --- /dev/null +++ b/lib/Support/DAGDeltaAlgorithm.cpp @@ -0,0 +1,357 @@ +//===--- DAGDeltaAlgorithm.cpp - A DAG Minimization Algorithm --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +//===----------------------------------------------------------------------===// +// +// The algorithm we use attempts to exploit the dependency information by +// minimizing top-down. We start by constructing an initial root set R, and +// then iteratively: +// +// 1. Minimize the set R using the test predicate: +// P'(S) = P(S union pred*(S)) +// +// 2. Extend R to R' = R union pred(R). +// +// until a fixed point is reached. +// +// The idea is that we want to quickly prune entire portions of the graph, so we +// try to find high-level nodes that can be eliminated with all of their +// dependents. +// +// FIXME: The current algorithm doesn't actually provide a strong guarantee +// about the minimality of the result. The problem is that after adding nodes to +// the required set, we no longer consider them for elimination. For strictly +// well formed predicates, this doesn't happen, but it commonly occurs in +// practice when there are unmodelled dependencies. I believe we can resolve +// this by allowing the required set to be minimized as well, but need more test +// cases first. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DAGDeltaAlgorithm.h" +#include "llvm/ADT/DeltaAlgorithm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +using namespace llvm; + +namespace { + +class DAGDeltaAlgorithmImpl { + friend class DeltaActiveSetHelper; + +public: + typedef DAGDeltaAlgorithm::change_ty change_ty; + typedef DAGDeltaAlgorithm::changeset_ty changeset_ty; + typedef DAGDeltaAlgorithm::changesetlist_ty changesetlist_ty; + typedef DAGDeltaAlgorithm::edge_ty edge_ty; + +private: + typedef std::vector::iterator pred_iterator_ty; + typedef std::vector::iterator succ_iterator_ty; + typedef std::set::iterator pred_closure_iterator_ty; + typedef std::set::iterator succ_closure_iterator_ty; + + DAGDeltaAlgorithm &DDA; + + const changeset_ty &Changes; + const std::vector &Dependencies; + + std::vector Roots; + + /// Cache of failed test results. Successful test results are never cached + /// since we always reduce following a success. We maintain an independent + /// cache from that used by the individual delta passes because we may get + /// hits across multiple individual delta invocations. + mutable std::set FailedTestsCache; + + // FIXME: Gross. + std::map > Predecessors; + std::map > Successors; + + std::map > PredClosure; + std::map > SuccClosure; + +private: + pred_iterator_ty pred_begin(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].begin(); + } + pred_iterator_ty pred_end(change_ty Node) { + assert(Predecessors.count(Node) && "Invalid node!"); + return Predecessors[Node].end(); + } + + pred_closure_iterator_ty pred_closure_begin(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].begin(); + } + pred_closure_iterator_ty pred_closure_end(change_ty Node) { + assert(PredClosure.count(Node) && "Invalid node!"); + return PredClosure[Node].end(); + } + + succ_iterator_ty succ_begin(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].begin(); + } + succ_iterator_ty succ_end(change_ty Node) { + assert(Successors.count(Node) && "Invalid node!"); + return Successors[Node].end(); + } + + succ_closure_iterator_ty succ_closure_begin(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].begin(); + } + succ_closure_iterator_ty succ_closure_end(change_ty Node) { + assert(SuccClosure.count(Node) && "Invalid node!"); + return SuccClosure[Node].end(); + } + + void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets, + const changeset_ty &Required) { + DDA.UpdatedSearchState(Changes, Sets, Required); + } + + /// ExecuteOneTest - Execute a single test predicate on the change set \arg S. + bool ExecuteOneTest(const changeset_ty &S) { + // Check dependencies invariant. + DEBUG({ + for (changeset_ty::const_iterator it = S.begin(), + ie = S.end(); it != ie; ++it) + for (succ_iterator_ty it2 = succ_begin(*it), + ie2 = succ_end(*it); it2 != ie2; ++it2) + assert(S.count(*it2) && "Attempt to run invalid changeset!"); + }); + + return DDA.ExecuteOneTest(S); + } + +public: + DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, + const changeset_ty &_Changes, + const std::vector &_Dependencies); + + changeset_ty Run(); + + /// GetTestResult - Get the test result for the active set \arg Changes with + /// \arg Required changes from the cache, executing the test if necessary. + /// + /// \param Changes - The set of active changes being minimized, which should + /// have their pred closure included in the test. + /// \param Required - The set of changes which have previously been + /// established to be required. + /// \return - The test result. + bool GetTestResult(const changeset_ty &Changes, const changeset_ty &Required); +}; + +/// Helper object for minimizing an active set of changes. +class DeltaActiveSetHelper : public DeltaAlgorithm { + DAGDeltaAlgorithmImpl &DDAI; + + const changeset_ty &Required; + +protected: + /// UpdatedSearchState - Callback used when the search state changes. + virtual void UpdatedSearchState(const changeset_ty &Changes, + const changesetlist_ty &Sets) { + DDAI.UpdatedSearchState(Changes, Sets, Required); + } + + virtual bool ExecuteOneTest(const changeset_ty &S) { + return DDAI.GetTestResult(S, Required); + } + +public: + DeltaActiveSetHelper(DAGDeltaAlgorithmImpl &_DDAI, + const changeset_ty &_Required) + : DDAI(_DDAI), Required(_Required) {} +}; + +} + +DAGDeltaAlgorithmImpl::DAGDeltaAlgorithmImpl(DAGDeltaAlgorithm &_DDA, + const changeset_ty &_Changes, + const std::vector + &_Dependencies) + : DDA(_DDA), + Changes(_Changes), + Dependencies(_Dependencies) +{ + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + Predecessors.insert(std::make_pair(*it, std::vector())); + Successors.insert(std::make_pair(*it, std::vector())); + } + for (std::vector::const_iterator it = Dependencies.begin(), + ie = Dependencies.end(); it != ie; ++it) { + Predecessors[it->second].push_back(it->first); + Successors[it->first].push_back(it->second); + } + + // Compute the roots. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + if (succ_begin(*it) == succ_end(*it)) + Roots.push_back(*it); + + // Pre-compute the closure of the successor relation. + std::vector Worklist(Roots.begin(), Roots.end()); + while (!Worklist.empty()) { + change_ty Change = Worklist.back(); + Worklist.pop_back(); + + std::set &ChangeSuccs = SuccClosure[Change]; + for (pred_iterator_ty it = pred_begin(Change), + ie = pred_end(Change); it != ie; ++it) { + SuccClosure[*it].insert(Change); + SuccClosure[*it].insert(ChangeSuccs.begin(), ChangeSuccs.end()); + Worklist.push_back(*it); + } + } + + // Invert to form the predecessor closure map. + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + PredClosure.insert(std::make_pair(*it, std::set())); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); it2 != ie2; ++it2) + PredClosure[*it2].insert(*it); + + // Dump useful debug info. + DEBUG({ + llvm::errs() << "-- DAGDeltaAlgorithmImpl --\n"; + llvm::errs() << "Changes: ["; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + if (it != Changes.begin()) llvm::errs() << ", "; + llvm::errs() << *it; + + if (succ_begin(*it) != succ_end(*it)) { + llvm::errs() << "("; + for (succ_iterator_ty it2 = succ_begin(*it), + ie2 = succ_end(*it); it2 != ie2; ++it2) { + if (it2 != succ_begin(*it)) llvm::errs() << ", "; + llvm::errs() << "->" << *it2; + } + llvm::errs() << ")"; + } + } + llvm::errs() << "]\n"; + + llvm::errs() << "Roots: ["; + for (std::vector::const_iterator it = Roots.begin(), + ie = Roots.end(); it != ie; ++it) { + if (it != Roots.begin()) llvm::errs() << ", "; + llvm::errs() << *it; + } + llvm::errs() << "]\n"; + + llvm::errs() << "Predecessor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (pred_closure_iterator_ty it2 = pred_closure_begin(*it), + ie2 = pred_closure_end(*it); it2 != ie2; ++it2) { + if (it2 != pred_closure_begin(*it)) llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "Successor Closure:\n"; + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) { + llvm::errs() << format(" %-4d: [", *it); + for (succ_closure_iterator_ty it2 = succ_closure_begin(*it), + ie2 = succ_closure_end(*it); it2 != ie2; ++it2) { + if (it2 != succ_closure_begin(*it)) llvm::errs() << ", "; + llvm::errs() << *it2; + } + llvm::errs() << "]\n"; + } + + llvm::errs() << "\n\n"; + }); +} + +bool DAGDeltaAlgorithmImpl::GetTestResult(const changeset_ty &Changes, + const changeset_ty &Required) { + changeset_ty Extended(Required); + Extended.insert(Changes.begin(), Changes.end()); + for (changeset_ty::const_iterator it = Changes.begin(), + ie = Changes.end(); it != ie; ++it) + Extended.insert(pred_closure_begin(*it), pred_closure_end(*it)); + + if (FailedTestsCache.count(Extended)) + return false; + + bool Result = ExecuteOneTest(Extended); + if (!Result) + FailedTestsCache.insert(Extended); + + return Result; +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithmImpl::Run() { + // The current set of changes we are minimizing, starting at the roots. + changeset_ty CurrentSet(Roots.begin(), Roots.end()); + + // The set of required changes. + changeset_ty Required; + + // Iterate until the active set of changes is empty. Convergence is guaranteed + // assuming input was a DAG. + // + // Invariant: CurrentSet intersect Required == {} + // Invariant: Required == (Required union succ*(Required)) + while (!CurrentSet.empty()) { + DEBUG({ + llvm::errs() << "DAG_DD - " << CurrentSet.size() << " active changes, " + << Required.size() << " required changes\n"; + }); + + // Minimize the current set of changes. + DeltaActiveSetHelper Helper(*this, Required); + changeset_ty CurrentMinSet = Helper.Run(CurrentSet); + + // Update the set of required changes. Since + // CurrentMinSet subset CurrentSet + // and after the last iteration, + // succ(CurrentSet) subset Required + // then + // succ(CurrentMinSet) subset Required + // and our invariant on Required is maintained. + Required.insert(CurrentMinSet.begin(), CurrentMinSet.end()); + + // Replace the current set with the predecssors of the minimized set of + // active changes. + CurrentSet.clear(); + for (changeset_ty::const_iterator it = CurrentMinSet.begin(), + ie = CurrentMinSet.end(); it != ie; ++it) + CurrentSet.insert(pred_begin(*it), pred_end(*it)); + + // FIXME: We could enforce CurrentSet intersect Required == {} here if we + // wanted to protect against cyclic graphs. + } + + return Required; +} + +DAGDeltaAlgorithm::changeset_ty +DAGDeltaAlgorithm::Run(const changeset_ty &Changes, + const std::vector &Dependencies) { + return DAGDeltaAlgorithmImpl(*this, Changes, Dependencies).Run(); +} diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp index d176548..9e52874 100644 --- a/lib/Support/DeltaAlgorithm.cpp +++ b/lib/Support/DeltaAlgorithm.cpp @@ -30,10 +30,10 @@ void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) { // FIXME: This is really slow. changeset_ty LHS, RHS; - unsigned idx = 0; + unsigned idx = 0, N = S.size() / 2; for (changeset_ty::const_iterator it = S.begin(), ie = S.end(); it != ie; ++it, ++idx) - ((idx & 1) ? LHS : RHS).insert(*it); + ((idx < N) ? LHS : RHS).insert(*it); if (!LHS.empty()) Res.push_back(LHS); if (!RHS.empty()) diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index c19c2d6..96ce9d3 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -86,8 +86,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) { /// const char *llvm::dwarf::ChildrenString(unsigned Children) { switch (Children) { - case DW_CHILDREN_no: return "CHILDREN_no"; - case DW_CHILDREN_yes: return "CHILDREN_yes"; + case DW_CHILDREN_no: return "DW_CHILDREN_no"; + case DW_CHILDREN_yes: return "DW_CHILDREN_yes"; } return 0; } @@ -207,27 +207,27 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { /// const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_FORM_addr: return "FORM_addr"; - case DW_FORM_block2: return "FORM_block2"; - case DW_FORM_block4: return "FORM_block4"; - case DW_FORM_data2: return "FORM_data2"; - case DW_FORM_data4: return "FORM_data4"; - case DW_FORM_data8: return "FORM_data8"; - case DW_FORM_string: return "FORM_string"; - case DW_FORM_block: return "FORM_block"; - case DW_FORM_block1: return "FORM_block1"; - case DW_FORM_data1: return "FORM_data1"; - case DW_FORM_flag: return "FORM_flag"; - case DW_FORM_sdata: return "FORM_sdata"; - case DW_FORM_strp: return "FORM_strp"; - case DW_FORM_udata: return "FORM_udata"; - case DW_FORM_ref_addr: return "FORM_ref_addr"; - case DW_FORM_ref1: return "FORM_ref1"; - case DW_FORM_ref2: return "FORM_ref2"; - case DW_FORM_ref4: return "FORM_ref4"; - case DW_FORM_ref8: return "FORM_ref8"; - case DW_FORM_ref_udata: return "FORM_ref_udata"; - case DW_FORM_indirect: return "FORM_indirect"; + case DW_FORM_addr: return "DW_FORM_addr"; + case DW_FORM_block2: return "DW_FORM_block2"; + case DW_FORM_block4: return "DW_FORM_block4"; + case DW_FORM_data2: return "DW_FORM_data2"; + case DW_FORM_data4: return "DW_FORM_data4"; + case DW_FORM_data8: return "DW_FORM_data8"; + case DW_FORM_string: return "DW_FORM_string"; + case DW_FORM_block: return "DW_FORM_block"; + case DW_FORM_block1: return "DW_FORM_block1"; + case DW_FORM_data1: return "DW_FORM_data1"; + case DW_FORM_flag: return "DW_FORM_flag"; + case DW_FORM_sdata: return "DW_FORM_sdata"; + case DW_FORM_strp: return "DW_FORM_strp"; + case DW_FORM_udata: return "DW_FORM_udata"; + case DW_FORM_ref_addr: return "DW_FORM_ref_addr"; + case DW_FORM_ref1: return "DW_FORM_ref1"; + case DW_FORM_ref2: return "DW_FORM_ref2"; + case DW_FORM_ref4: return "DW_FORM_ref4"; + case DW_FORM_ref8: return "DW_FORM_ref8"; + case DW_FORM_ref_udata: return "DW_FORM_ref_udata"; + case DW_FORM_indirect: return "DW_FORM_indirect"; } return 0; } @@ -236,72 +236,159 @@ const char *llvm::dwarf::FormEncodingString(unsigned Encoding) { /// encoding. const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_OP_addr: return "OP_addr"; - case DW_OP_deref: return "OP_deref"; - case DW_OP_const1u: return "OP_const1u"; - case DW_OP_const1s: return "OP_const1s"; - case DW_OP_const2u: return "OP_const2u"; - case DW_OP_const2s: return "OP_const2s"; - case DW_OP_const4u: return "OP_const4u"; - case DW_OP_const4s: return "OP_const4s"; - case DW_OP_const8u: return "OP_const8u"; - case DW_OP_const8s: return "OP_const8s"; - case DW_OP_constu: return "OP_constu"; - case DW_OP_consts: return "OP_consts"; - case DW_OP_dup: return "OP_dup"; - case DW_OP_drop: return "OP_drop"; - case DW_OP_over: return "OP_over"; - case DW_OP_pick: return "OP_pick"; - case DW_OP_swap: return "OP_swap"; - case DW_OP_rot: return "OP_rot"; - case DW_OP_xderef: return "OP_xderef"; - case DW_OP_abs: return "OP_abs"; - case DW_OP_and: return "OP_and"; - case DW_OP_div: return "OP_div"; - case DW_OP_minus: return "OP_minus"; - case DW_OP_mod: return "OP_mod"; - case DW_OP_mul: return "OP_mul"; - case DW_OP_neg: return "OP_neg"; - case DW_OP_not: return "OP_not"; - case DW_OP_or: return "OP_or"; - case DW_OP_plus: return "OP_plus"; - case DW_OP_plus_uconst: return "OP_plus_uconst"; - case DW_OP_shl: return "OP_shl"; - case DW_OP_shr: return "OP_shr"; - case DW_OP_shra: return "OP_shra"; - case DW_OP_xor: return "OP_xor"; - case DW_OP_skip: return "OP_skip"; - case DW_OP_bra: return "OP_bra"; - case DW_OP_eq: return "OP_eq"; - case DW_OP_ge: return "OP_ge"; - case DW_OP_gt: return "OP_gt"; - case DW_OP_le: return "OP_le"; - case DW_OP_lt: return "OP_lt"; - case DW_OP_ne: return "OP_ne"; - case DW_OP_lit0: return "OP_lit0"; - case DW_OP_lit1: return "OP_lit1"; - case DW_OP_lit31: return "OP_lit31"; - case DW_OP_reg0: return "OP_reg0"; - case DW_OP_reg1: return "OP_reg1"; - case DW_OP_reg31: return "OP_reg31"; - case DW_OP_breg0: return "OP_breg0"; - case DW_OP_breg1: return "OP_breg1"; - case DW_OP_breg31: return "OP_breg31"; - case DW_OP_regx: return "OP_regx"; - case DW_OP_fbreg: return "OP_fbreg"; - case DW_OP_bregx: return "OP_bregx"; - case DW_OP_piece: return "OP_piece"; - case DW_OP_deref_size: return "OP_deref_size"; - case DW_OP_xderef_size: return "OP_xderef_size"; - case DW_OP_nop: return "OP_nop"; - case DW_OP_push_object_address: return "OP_push_object_address"; - case DW_OP_call2: return "OP_call2"; - case DW_OP_call4: return "OP_call4"; - case DW_OP_call_ref: return "OP_call_ref"; - case DW_OP_form_tls_address: return "OP_form_tls_address"; - case DW_OP_call_frame_cfa: return "OP_call_frame_cfa"; - case DW_OP_lo_user: return "OP_lo_user"; - case DW_OP_hi_user: return "OP_hi_user"; + case DW_OP_addr: return "DW_OP_addr"; + case DW_OP_deref: return "DW_OP_deref"; + case DW_OP_const1u: return "DW_OP_const1u"; + case DW_OP_const1s: return "DW_OP_const1s"; + case DW_OP_const2u: return "DW_OP_const2u"; + case DW_OP_const2s: return "DW_OP_const2s"; + case DW_OP_const4u: return "DW_OP_const4u"; + case DW_OP_const4s: return "DW_OP_const4s"; + case DW_OP_const8u: return "DW_OP_const8u"; + case DW_OP_const8s: return "DW_OP_const8s"; + case DW_OP_constu: return "DW_OP_constu"; + case DW_OP_consts: return "DW_OP_consts"; + case DW_OP_dup: return "DW_OP_dup"; + case DW_OP_drop: return "DW_OP_drop"; + case DW_OP_over: return "DW_OP_over"; + case DW_OP_pick: return "DW_OP_pick"; + case DW_OP_swap: return "DW_OP_swap"; + case DW_OP_rot: return "DW_OP_rot"; + case DW_OP_xderef: return "DW_OP_xderef"; + case DW_OP_abs: return "DW_OP_abs"; + case DW_OP_and: return "DW_OP_and"; + case DW_OP_div: return "DW_OP_div"; + case DW_OP_minus: return "DW_OP_minus"; + case DW_OP_mod: return "DW_OP_mod"; + case DW_OP_mul: return "DW_OP_mul"; + case DW_OP_neg: return "DW_OP_neg"; + case DW_OP_not: return "DW_OP_not"; + case DW_OP_or: return "DW_OP_or"; + case DW_OP_plus: return "DW_OP_plus"; + case DW_OP_plus_uconst: return "DW_OP_plus_uconst"; + case DW_OP_shl: return "DW_OP_shl"; + case DW_OP_shr: return "DW_OP_shr"; + case DW_OP_shra: return "DW_OP_shra"; + case DW_OP_xor: return "DW_OP_xor"; + case DW_OP_skip: return "DW_OP_skip"; + case DW_OP_bra: return "DW_OP_bra"; + case DW_OP_eq: return "DW_OP_eq"; + case DW_OP_ge: return "DW_OP_ge"; + case DW_OP_gt: return "DW_OP_gt"; + case DW_OP_le: return "DW_OP_le"; + case DW_OP_lt: return "DW_OP_lt"; + case DW_OP_ne: return "DW_OP_ne"; + case DW_OP_lit0: return "DW_OP_lit0"; + case DW_OP_lit1: return "DW_OP_lit1"; + case DW_OP_lit2: return "DW_OP_lit2"; + case DW_OP_lit3: return "DW_OP_lit3"; + case DW_OP_lit4: return "DW_OP_lit4"; + case DW_OP_lit5: return "DW_OP_lit5"; + case DW_OP_lit6: return "DW_OP_lit6"; + case DW_OP_lit7: return "DW_OP_lit7"; + case DW_OP_lit8: return "DW_OP_lit8"; + case DW_OP_lit9: return "DW_OP_lit9"; + case DW_OP_lit10: return "DW_OP_lit10"; + case DW_OP_lit11: return "DW_OP_lit11"; + case DW_OP_lit12: return "DW_OP_lit12"; + case DW_OP_lit13: return "DW_OP_lit13"; + case DW_OP_lit14: return "DW_OP_lit14"; + case DW_OP_lit15: return "DW_OP_lit15"; + case DW_OP_lit16: return "DW_OP_lit16"; + case DW_OP_lit17: return "DW_OP_lit17"; + case DW_OP_lit18: return "DW_OP_lit18"; + case DW_OP_lit19: return "DW_OP_lit19"; + case DW_OP_lit20: return "DW_OP_lit20"; + case DW_OP_lit21: return "DW_OP_lit21"; + case DW_OP_lit22: return "DW_OP_lit22"; + case DW_OP_lit23: return "DW_OP_lit23"; + case DW_OP_lit24: return "DW_OP_lit24"; + case DW_OP_lit25: return "DW_OP_lit25"; + case DW_OP_lit26: return "DW_OP_lit26"; + case DW_OP_lit27: return "DW_OP_lit27"; + case DW_OP_lit28: return "DW_OP_lit28"; + case DW_OP_lit29: return "DW_OP_lit29"; + case DW_OP_lit30: return "DW_OP_lit30"; + case DW_OP_lit31: return "DW_OP_lit31"; + case DW_OP_reg0: return "DW_OP_reg0"; + case DW_OP_reg1: return "DW_OP_reg1"; + case DW_OP_reg2: return "DW_OP_reg2"; + case DW_OP_reg3: return "DW_OP_reg3"; + case DW_OP_reg4: return "DW_OP_reg4"; + case DW_OP_reg5: return "DW_OP_reg5"; + case DW_OP_reg6: return "DW_OP_reg6"; + case DW_OP_reg7: return "DW_OP_reg7"; + case DW_OP_reg8: return "DW_OP_reg8"; + case DW_OP_reg9: return "DW_OP_reg9"; + case DW_OP_reg10: return "DW_OP_reg10"; + case DW_OP_reg11: return "DW_OP_reg11"; + case DW_OP_reg12: return "DW_OP_reg12"; + case DW_OP_reg13: return "DW_OP_reg13"; + case DW_OP_reg14: return "DW_OP_reg14"; + case DW_OP_reg15: return "DW_OP_reg15"; + case DW_OP_reg16: return "DW_OP_reg16"; + case DW_OP_reg17: return "DW_OP_reg17"; + case DW_OP_reg18: return "DW_OP_reg18"; + case DW_OP_reg19: return "DW_OP_reg19"; + case DW_OP_reg20: return "DW_OP_reg20"; + case DW_OP_reg21: return "DW_OP_reg21"; + case DW_OP_reg22: return "DW_OP_reg22"; + case DW_OP_reg23: return "DW_OP_reg23"; + case DW_OP_reg24: return "DW_OP_reg24"; + case DW_OP_reg25: return "DW_OP_reg25"; + case DW_OP_reg26: return "DW_OP_reg26"; + case DW_OP_reg27: return "DW_OP_reg27"; + case DW_OP_reg28: return "DW_OP_reg28"; + case DW_OP_reg29: return "DW_OP_reg29"; + case DW_OP_reg30: return "DW_OP_reg30"; + case DW_OP_reg31: return "DW_OP_reg31"; + case DW_OP_breg0: return "DW_OP_breg0"; + case DW_OP_breg1: return "DW_OP_breg1"; + case DW_OP_breg2: return "DW_OP_breg2"; + case DW_OP_breg3: return "DW_OP_breg3"; + case DW_OP_breg4: return "DW_OP_breg4"; + case DW_OP_breg5: return "DW_OP_breg5"; + case DW_OP_breg6: return "DW_OP_breg6"; + case DW_OP_breg7: return "DW_OP_breg7"; + case DW_OP_breg8: return "DW_OP_breg8"; + case DW_OP_breg9: return "DW_OP_breg9"; + case DW_OP_breg10: return "DW_OP_breg10"; + case DW_OP_breg11: return "DW_OP_breg11"; + case DW_OP_breg12: return "DW_OP_breg12"; + case DW_OP_breg13: return "DW_OP_breg13"; + case DW_OP_breg14: return "DW_OP_breg14"; + case DW_OP_breg15: return "DW_OP_breg15"; + case DW_OP_breg16: return "DW_OP_breg16"; + case DW_OP_breg17: return "DW_OP_breg17"; + case DW_OP_breg18: return "DW_OP_breg18"; + case DW_OP_breg19: return "DW_OP_breg19"; + case DW_OP_breg20: return "DW_OP_breg20"; + case DW_OP_breg21: return "DW_OP_breg21"; + case DW_OP_breg22: return "DW_OP_breg22"; + case DW_OP_breg23: return "DW_OP_breg23"; + case DW_OP_breg24: return "DW_OP_breg24"; + case DW_OP_breg25: return "DW_OP_breg25"; + case DW_OP_breg26: return "DW_OP_breg26"; + case DW_OP_breg27: return "DW_OP_breg27"; + case DW_OP_breg28: return "DW_OP_breg28"; + case DW_OP_breg29: return "DW_OP_breg29"; + case DW_OP_breg30: return "DW_OP_breg30"; + case DW_OP_breg31: return "DW_OP_breg31"; + case DW_OP_regx: return "DW_OP_regx"; + case DW_OP_fbreg: return "DW_OP_fbreg"; + case DW_OP_bregx: return "DW_OP_bregx"; + case DW_OP_piece: return "DW_OP_piece"; + case DW_OP_deref_size: return "DW_OP_deref_size"; + case DW_OP_xderef_size: return "DW_OP_xderef_size"; + case DW_OP_nop: return "DW_OP_nop"; + case DW_OP_push_object_address: return "DW_OP_push_object_address"; + case DW_OP_call2: return "DW_OP_call2"; + case DW_OP_call4: return "DW_OP_call4"; + case DW_OP_call_ref: return "DW_OP_call_ref"; + case DW_OP_form_tls_address: return "DW_OP_form_tls_address"; + case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP_lo_user: return "DW_OP_lo_user"; + case DW_OP_hi_user: return "DW_OP_hi_user"; } return 0; } @@ -310,23 +397,23 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { /// encoding. const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { switch (Encoding) { - case DW_ATE_address: return "ATE_address"; - case DW_ATE_boolean: return "ATE_boolean"; - case DW_ATE_complex_float: return "ATE_complex_float"; - case DW_ATE_float: return "ATE_float"; - case DW_ATE_signed: return "ATE_signed"; - case DW_ATE_signed_char: return "ATE_signed_char"; - case DW_ATE_unsigned: return "ATE_unsigned"; - case DW_ATE_unsigned_char: return "ATE_unsigned_char"; - case DW_ATE_imaginary_float: return "ATE_imaginary_float"; - case DW_ATE_packed_decimal: return "ATE_packed_decimal"; - case DW_ATE_numeric_string: return "ATE_numeric_string"; - case DW_ATE_edited: return "ATE_edited"; - case DW_ATE_signed_fixed: return "ATE_signed_fixed"; - case DW_ATE_unsigned_fixed: return "ATE_unsigned_fixed"; - case DW_ATE_decimal_float: return "ATE_decimal_float"; - case DW_ATE_lo_user: return "ATE_lo_user"; - case DW_ATE_hi_user: return "ATE_hi_user"; + case DW_ATE_address: return "DW_ATE_address"; + case DW_ATE_boolean: return "DW_ATE_boolean"; + case DW_ATE_complex_float: return "DW_ATE_complex_float"; + case DW_ATE_float: return "DW_ATE_float"; + case DW_ATE_signed: return "DW_ATE_signed"; + case DW_ATE_signed_char: return "DW_ATE_signed_char"; + case DW_ATE_unsigned: return "DW_ATE_unsigned"; + case DW_ATE_unsigned_char: return "DW_ATE_unsigned_char"; + case DW_ATE_imaginary_float: return "DW_ATE_imaginary_float"; + case DW_ATE_packed_decimal: return "DW_ATE_packed_decimal"; + case DW_ATE_numeric_string: return "DW_ATE_numeric_string"; + case DW_ATE_edited: return "DW_ATE_edited"; + case DW_ATE_signed_fixed: return "DW_ATE_signed_fixed"; + case DW_ATE_unsigned_fixed: return "DW_ATE_unsigned_fixed"; + case DW_ATE_decimal_float: return "DW_ATE_decimal_float"; + case DW_ATE_lo_user: return "DW_ATE_lo_user"; + case DW_ATE_hi_user: return "DW_ATE_hi_user"; } return 0; } @@ -335,11 +422,11 @@ const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) { /// attribute. const char *llvm::dwarf::DecimalSignString(unsigned Sign) { switch (Sign) { - case DW_DS_unsigned: return "DS_unsigned"; - case DW_DS_leading_overpunch: return "DS_leading_overpunch"; - case DW_DS_trailing_overpunch: return "DS_trailing_overpunch"; - case DW_DS_leading_separate: return "DS_leading_separate"; - case DW_DS_trailing_separate: return "DS_trailing_separate"; + case DW_DS_unsigned: return "DW_DS_unsigned"; + case DW_DS_leading_overpunch: return "DW_DS_leading_overpunch"; + case DW_DS_trailing_overpunch: return "DW_DS_trailing_overpunch"; + case DW_DS_leading_separate: return "DW_DS_leading_separate"; + case DW_DS_trailing_separate: return "DW_DS_trailing_separate"; } return 0; } @@ -348,11 +435,11 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) { /// const char *llvm::dwarf::EndianityString(unsigned Endian) { switch (Endian) { - case DW_END_default: return "END_default"; - case DW_END_big: return "END_big"; - case DW_END_little: return "END_little"; - case DW_END_lo_user: return "END_lo_user"; - case DW_END_hi_user: return "END_hi_user"; + case DW_END_default: return "DW_END_default"; + case DW_END_big: return "DW_END_big"; + case DW_END_little: return "DW_END_little"; + case DW_END_lo_user: return "DW_END_lo_user"; + case DW_END_hi_user: return "DW_END_hi_user"; } return 0; } @@ -362,9 +449,9 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) { const char *llvm::dwarf::AccessibilityString(unsigned Access) { switch (Access) { // Accessibility codes - case DW_ACCESS_public: return "ACCESS_public"; - case DW_ACCESS_protected: return "ACCESS_protected"; - case DW_ACCESS_private: return "ACCESS_private"; + case DW_ACCESS_public: return "DW_ACCESS_public"; + case DW_ACCESS_protected: return "DW_ACCESS_protected"; + case DW_ACCESS_private: return "DW_ACCESS_private"; } return 0; } @@ -373,9 +460,9 @@ const char *llvm::dwarf::AccessibilityString(unsigned Access) { /// const char *llvm::dwarf::VisibilityString(unsigned Visibility) { switch (Visibility) { - case DW_VIS_local: return "VIS_local"; - case DW_VIS_exported: return "VIS_exported"; - case DW_VIS_qualified: return "VIS_qualified"; + case DW_VIS_local: return "DW_VIS_local"; + case DW_VIS_exported: return "DW_VIS_exported"; + case DW_VIS_qualified: return "DW_VIS_qualified"; } return 0; } @@ -384,9 +471,9 @@ const char *llvm::dwarf::VisibilityString(unsigned Visibility) { /// const char *llvm::dwarf::VirtualityString(unsigned Virtuality) { switch (Virtuality) { - case DW_VIRTUALITY_none: return "VIRTUALITY_none"; - case DW_VIRTUALITY_virtual: return "VIRTUALITY_virtual"; - case DW_VIRTUALITY_pure_virtual: return "VIRTUALITY_pure_virtual"; + case DW_VIRTUALITY_none: return "DW_VIRTUALITY_none"; + case DW_VIRTUALITY_virtual: return "DW_VIRTUALITY_virtual"; + case DW_VIRTUALITY_pure_virtual: return "DW_VIRTUALITY_pure_virtual"; } return 0; } @@ -395,27 +482,27 @@ const char *llvm::dwarf::VirtualityString(unsigned Virtuality) { /// const char *llvm::dwarf::LanguageString(unsigned Language) { switch (Language) { - case DW_LANG_C89: return "LANG_C89"; - case DW_LANG_C: return "LANG_C"; - case DW_LANG_Ada83: return "LANG_Ada83"; - case DW_LANG_C_plus_plus: return "LANG_C_plus_plus"; - case DW_LANG_Cobol74: return "LANG_Cobol74"; - case DW_LANG_Cobol85: return "LANG_Cobol85"; - case DW_LANG_Fortran77: return "LANG_Fortran77"; - case DW_LANG_Fortran90: return "LANG_Fortran90"; - case DW_LANG_Pascal83: return "LANG_Pascal83"; - case DW_LANG_Modula2: return "LANG_Modula2"; - case DW_LANG_Java: return "LANG_Java"; - case DW_LANG_C99: return "LANG_C99"; - case DW_LANG_Ada95: return "LANG_Ada95"; - case DW_LANG_Fortran95: return "LANG_Fortran95"; - case DW_LANG_PLI: return "LANG_PLI"; - case DW_LANG_ObjC: return "LANG_ObjC"; - case DW_LANG_ObjC_plus_plus: return "LANG_ObjC_plus_plus"; - case DW_LANG_UPC: return "LANG_UPC"; - case DW_LANG_D: return "LANG_D"; - case DW_LANG_lo_user: return "LANG_lo_user"; - case DW_LANG_hi_user: return "LANG_hi_user"; + case DW_LANG_C89: return "DW_LANG_C89"; + case DW_LANG_C: return "DW_LANG_C"; + case DW_LANG_Ada83: return "DW_LANG_Ada83"; + case DW_LANG_C_plus_plus: return "DW_LANG_C_plus_plus"; + case DW_LANG_Cobol74: return "DW_LANG_Cobol74"; + case DW_LANG_Cobol85: return "DW_LANG_Cobol85"; + case DW_LANG_Fortran77: return "DW_LANG_Fortran77"; + case DW_LANG_Fortran90: return "DW_LANG_Fortran90"; + case DW_LANG_Pascal83: return "DW_LANG_Pascal83"; + case DW_LANG_Modula2: return "DW_LANG_Modula2"; + case DW_LANG_Java: return "DW_LANG_Java"; + case DW_LANG_C99: return "DW_LANG_C99"; + case DW_LANG_Ada95: return "DW_LANG_Ada95"; + case DW_LANG_Fortran95: return "DW_LANG_Fortran95"; + case DW_LANG_PLI: return "DW_LANG_PLI"; + case DW_LANG_ObjC: return "DW_LANG_ObjC"; + case DW_LANG_ObjC_plus_plus: return "DW_LANG_ObjC_plus_plus"; + case DW_LANG_UPC: return "DW_LANG_UPC"; + case DW_LANG_D: return "DW_LANG_D"; + case DW_LANG_lo_user: return "DW_LANG_lo_user"; + case DW_LANG_hi_user: return "DW_LANG_hi_user"; } return 0; } @@ -424,10 +511,10 @@ const char *llvm::dwarf::LanguageString(unsigned Language) { /// const char *llvm::dwarf::CaseString(unsigned Case) { switch (Case) { - case DW_ID_case_sensitive: return "ID_case_sensitive"; - case DW_ID_up_case: return "ID_up_case"; - case DW_ID_down_case: return "ID_down_case"; - case DW_ID_case_insensitive: return "ID_case_insensitive"; + case DW_ID_case_sensitive: return "DW_ID_case_sensitive"; + case DW_ID_up_case: return "DW_ID_up_case"; + case DW_ID_down_case: return "DW_ID_down_case"; + case DW_ID_case_insensitive: return "DW_ID_case_insensitive"; } return 0; } @@ -436,11 +523,11 @@ const char *llvm::dwarf::CaseString(unsigned Case) { /// const char *llvm::dwarf::ConventionString(unsigned Convention) { switch (Convention) { - case DW_CC_normal: return "CC_normal"; - case DW_CC_program: return "CC_program"; - case DW_CC_nocall: return "CC_nocall"; - case DW_CC_lo_user: return "CC_lo_user"; - case DW_CC_hi_user: return "CC_hi_user"; + case DW_CC_normal: return "DW_CC_normal"; + case DW_CC_program: return "DW_CC_program"; + case DW_CC_nocall: return "DW_CC_nocall"; + case DW_CC_lo_user: return "DW_CC_lo_user"; + case DW_CC_hi_user: return "DW_CC_hi_user"; } return 0; } @@ -449,10 +536,10 @@ const char *llvm::dwarf::ConventionString(unsigned Convention) { /// const char *llvm::dwarf::InlineCodeString(unsigned Code) { switch (Code) { - case DW_INL_not_inlined: return "INL_not_inlined"; - case DW_INL_inlined: return "INL_inlined"; - case DW_INL_declared_not_inlined: return "INL_declared_not_inlined"; - case DW_INL_declared_inlined: return "INL_declared_inlined"; + case DW_INL_not_inlined: return "DW_INL_not_inlined"; + case DW_INL_inlined: return "DW_INL_inlined"; + case DW_INL_declared_not_inlined: return "DW_INL_declared_not_inlined"; + case DW_INL_declared_inlined: return "DW_INL_declared_inlined"; } return 0; } @@ -461,8 +548,8 @@ const char *llvm::dwarf::InlineCodeString(unsigned Code) { /// const char *llvm::dwarf::ArrayOrderString(unsigned Order) { switch (Order) { - case DW_ORD_row_major: return "ORD_row_major"; - case DW_ORD_col_major: return "ORD_col_major"; + case DW_ORD_row_major: return "DW_ORD_row_major"; + case DW_ORD_col_major: return "DW_ORD_col_major"; } return 0; } @@ -471,8 +558,8 @@ const char *llvm::dwarf::ArrayOrderString(unsigned Order) { /// descriptor. const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) { switch (Discriminant) { - case DW_DSC_label: return "DSC_label"; - case DW_DSC_range: return "DSC_range"; + case DW_DSC_label: return "DW_DSC_label"; + case DW_DSC_range: return "DW_DSC_range"; } return 0; } @@ -481,18 +568,18 @@ const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) { /// const char *llvm::dwarf::LNStandardString(unsigned Standard) { switch (Standard) { - case DW_LNS_copy: return "LNS_copy"; - case DW_LNS_advance_pc: return "LNS_advance_pc"; - case DW_LNS_advance_line: return "LNS_advance_line"; - case DW_LNS_set_file: return "LNS_set_file"; - case DW_LNS_set_column: return "LNS_set_column"; - case DW_LNS_negate_stmt: return "LNS_negate_stmt"; - case DW_LNS_set_basic_block: return "LNS_set_basic_block"; - case DW_LNS_const_add_pc: return "LNS_const_add_pc"; - case DW_LNS_fixed_advance_pc: return "LNS_fixed_advance_pc"; - case DW_LNS_set_prologue_end: return "LNS_set_prologue_end"; - case DW_LNS_set_epilogue_begin: return "LNS_set_epilogue_begin"; - case DW_LNS_set_isa: return "LNS_set_isa"; + case DW_LNS_copy: return "DW_LNS_copy"; + case DW_LNS_advance_pc: return "DW_LNS_advance_pc"; + case DW_LNS_advance_line: return "DW_LNS_advance_line"; + case DW_LNS_set_file: return "DW_LNS_set_file"; + case DW_LNS_set_column: return "DW_LNS_set_column"; + case DW_LNS_negate_stmt: return "DW_LNS_negate_stmt"; + case DW_LNS_set_basic_block: return "DW_LNS_set_basic_block"; + case DW_LNS_const_add_pc: return "DW_LNS_const_add_pc"; + case DW_LNS_fixed_advance_pc: return "DW_LNS_fixed_advance_pc"; + case DW_LNS_set_prologue_end: return "DW_LNS_set_prologue_end"; + case DW_LNS_set_epilogue_begin: return "DW_LNS_set_epilogue_begin"; + case DW_LNS_set_isa: return "DW_LNS_set_isa"; } return 0; } @@ -502,11 +589,11 @@ const char *llvm::dwarf::LNStandardString(unsigned Standard) { const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { switch (Encoding) { // Line Number Extended Opcode Encodings - case DW_LNE_end_sequence: return "LNE_end_sequence"; - case DW_LNE_set_address: return "LNE_set_address"; - case DW_LNE_define_file: return "LNE_define_file"; - case DW_LNE_lo_user: return "LNE_lo_user"; - case DW_LNE_hi_user: return "LNE_hi_user"; + case DW_LNE_end_sequence: return "DW_LNE_end_sequence"; + case DW_LNE_set_address: return "DW_LNE_set_address"; + case DW_LNE_define_file: return "DW_LNE_define_file"; + case DW_LNE_lo_user: return "DW_LNE_lo_user"; + case DW_LNE_hi_user: return "DW_LNE_hi_user"; } return 0; } @@ -516,11 +603,11 @@ const char *llvm::dwarf::LNExtendedString(unsigned Encoding) { const char *llvm::dwarf::MacinfoString(unsigned Encoding) { switch (Encoding) { // Macinfo Type Encodings - case DW_MACINFO_define: return "MACINFO_define"; - case DW_MACINFO_undef: return "MACINFO_undef"; - case DW_MACINFO_start_file: return "MACINFO_start_file"; - case DW_MACINFO_end_file: return "MACINFO_end_file"; - case DW_MACINFO_vendor_ext: return "MACINFO_vendor_ext"; + case DW_MACINFO_define: return "DW_MACINFO_define"; + case DW_MACINFO_undef: return "DW_MACINFO_undef"; + case DW_MACINFO_start_file: return "DW_MACINFO_start_file"; + case DW_MACINFO_end_file: return "DW_MACINFO_end_file"; + case DW_MACINFO_vendor_ext: return "DW_MACINFO_vendor_ext"; } return 0; } @@ -529,33 +616,33 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { /// encodings. const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { - case DW_CFA_advance_loc: return "CFA_advance_loc"; - case DW_CFA_offset: return "CFA_offset"; - case DW_CFA_restore: return "CFA_restore"; - case DW_CFA_set_loc: return "CFA_set_loc"; - case DW_CFA_advance_loc1: return "CFA_advance_loc1"; - case DW_CFA_advance_loc2: return "CFA_advance_loc2"; - case DW_CFA_advance_loc4: return "CFA_advance_loc4"; - case DW_CFA_offset_extended: return "CFA_offset_extended"; - case DW_CFA_restore_extended: return "CFA_restore_extended"; - case DW_CFA_undefined: return "CFA_undefined"; - case DW_CFA_same_value: return "CFA_same_value"; - case DW_CFA_register: return "CFA_register"; - case DW_CFA_remember_state: return "CFA_remember_state"; - case DW_CFA_restore_state: return "CFA_restore_state"; - case DW_CFA_def_cfa: return "CFA_def_cfa"; - case DW_CFA_def_cfa_register: return "CFA_def_cfa_register"; - case DW_CFA_def_cfa_offset: return "CFA_def_cfa_offset"; - case DW_CFA_def_cfa_expression: return "CFA_def_cfa_expression"; - case DW_CFA_expression: return "CFA_expression"; - case DW_CFA_offset_extended_sf: return "CFA_offset_extended_sf"; - case DW_CFA_def_cfa_sf: return "CFA_def_cfa_sf"; - case DW_CFA_def_cfa_offset_sf: return "CFA_def_cfa_offset_sf"; - case DW_CFA_val_offset: return "CFA_val_offset"; - case DW_CFA_val_offset_sf: return "CFA_val_offset_sf"; - case DW_CFA_val_expression: return "CFA_val_expression"; - case DW_CFA_lo_user: return "CFA_lo_user"; - case DW_CFA_hi_user: return "CFA_hi_user"; + case DW_CFA_advance_loc: return "DW_CFA_advance_loc"; + case DW_CFA_offset: return "DW_CFA_offset"; + case DW_CFA_restore: return "DW_CFA_restore"; + case DW_CFA_set_loc: return "DW_CFA_set_loc"; + case DW_CFA_advance_loc1: return "DW_CFA_advance_loc1"; + case DW_CFA_advance_loc2: return "DW_CFA_advance_loc2"; + case DW_CFA_advance_loc4: return "DW_CFA_advance_loc4"; + case DW_CFA_offset_extended: return "DW_CFA_offset_extended"; + case DW_CFA_restore_extended: return "DW_CFA_restore_extended"; + case DW_CFA_undefined: return "DW_CFA_undefined"; + case DW_CFA_same_value: return "DW_CFA_same_value"; + case DW_CFA_register: return "DW_CFA_register"; + case DW_CFA_remember_state: return "DW_CFA_remember_state"; + case DW_CFA_restore_state: return "DW_CFA_restore_state"; + case DW_CFA_def_cfa: return "DW_CFA_def_cfa"; + case DW_CFA_def_cfa_register: return "DW_CFA_def_cfa_register"; + case DW_CFA_def_cfa_offset: return "DW_CFA_def_cfa_offset"; + case DW_CFA_def_cfa_expression: return "DW_CFA_def_cfa_expression"; + case DW_CFA_expression: return "DW_CFA_expression"; + case DW_CFA_offset_extended_sf: return "DW_CFA_offset_extended_sf"; + case DW_CFA_def_cfa_sf: return "DW_CFA_def_cfa_sf"; + case DW_CFA_def_cfa_offset_sf: return "DW_CFA_def_cfa_offset_sf"; + case DW_CFA_val_offset: return "DW_CFA_val_offset"; + case DW_CFA_val_offset_sf: return "DW_CFA_val_offset_sf"; + case DW_CFA_val_expression: return "DW_CFA_val_expression"; + case DW_CFA_lo_user: return "DW_CFA_lo_user"; + case DW_CFA_hi_user: return "DW_CFA_hi_user"; } return 0; } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 095395f..1bde2fe 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -51,7 +51,15 @@ static const char *BackupNumber(const char *Pos, const char *FirstChar) { if (!isNumberChar(*Pos)) return Pos; // Otherwise, return to the start of the number. + bool HasPeriod = false; while (Pos > FirstChar && isNumberChar(Pos[-1])) { + // Backup over at most one period. + if (Pos[-1] == '.') { + if (HasPeriod) + break; + HasPeriod = true; + } + --Pos; if (Pos > FirstChar && isSignedChar(Pos[0]) && !isExponentChar(Pos[-1])) break; @@ -204,16 +212,16 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, const char *F1P = File1Start; const char *F2P = File2Start; - if (A_size == B_size) { - // Are the buffers identical? Common case: Handle this efficiently. - if (std::memcmp(File1Start, File2Start, A_size) == 0) - return 0; + // Are the buffers identical? Common case: Handle this efficiently. + if (A_size == B_size && + std::memcmp(File1Start, File2Start, A_size) == 0) + return 0; - if (AbsTol == 0 && RelTol == 0) { - if (Error) - *Error = "Files differ without tolerance allowance"; - return 1; // Files different! - } + // Otherwise, we are done a tolerances are set. + if (AbsTol == 0 && RelTol == 0) { + if (Error) + *Error = "Files differ without tolerance allowance"; + return 1; // Files different! } bool CompareFailed = false; diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 3f467fe..b8dca33 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -175,6 +175,14 @@ static void **GetBucketFor(const FoldingSetNodeID &ID, return Buckets + BucketNum; } +/// AllocateBuckets - Allocated initialized bucket memory. +static void **AllocateBuckets(unsigned NumBuckets) { + void **Buckets = static_cast(calloc(NumBuckets+1, sizeof(void*))); + // Set the very last bucket to be a non-null "pointer". + Buckets[NumBuckets] = reinterpret_cast(-1); + return Buckets; +} + //===----------------------------------------------------------------------===// // FoldingSetImpl Implementation @@ -182,11 +190,11 @@ FoldingSetImpl::FoldingSetImpl(unsigned Log2InitSize) { assert(5 < Log2InitSize && Log2InitSize < 32 && "Initial hash table size out of range"); NumBuckets = 1 << Log2InitSize; - Buckets = new void*[NumBuckets+1]; - clear(); + Buckets = AllocateBuckets(NumBuckets); + NumNodes = 0; } FoldingSetImpl::~FoldingSetImpl() { - delete [] Buckets; + free(Buckets); } void FoldingSetImpl::clear() { // Set all but the last bucket to null pointers. @@ -207,8 +215,8 @@ void FoldingSetImpl::GrowHashTable() { NumBuckets <<= 1; // Clear out new buckets. - Buckets = new void*[NumBuckets+1]; - clear(); + Buckets = AllocateBuckets(NumBuckets); + NumNodes = 0; // Walk the old buckets, rehashing nodes into their new place. FoldingSetNodeID ID; @@ -227,7 +235,7 @@ void FoldingSetImpl::GrowHashTable() { } } - delete[] OldBuckets; + free(OldBuckets); } /// FindNodeOrInsertPos - Look up the node specified by ID. If it exists, diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 2b95089..542162d 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" +#include "llvm/Support/MathExtras.h" #include "llvm/System/Errno.h" #include "llvm/System/Path.h" #include "llvm/System/Process.h" @@ -37,22 +38,7 @@ using namespace llvm; // MemoryBuffer implementation itself. //===----------------------------------------------------------------------===// -MemoryBuffer::~MemoryBuffer() { - if (MustDeleteBuffer) - free((void*)BufferStart); -} - -/// initCopyOf - Initialize this source buffer with a copy of the specified -/// memory range. We make the copy so that we can null terminate it -/// successfully. -void MemoryBuffer::initCopyOf(const char *BufStart, const char *BufEnd) { - size_t Size = BufEnd-BufStart; - BufferStart = (char *)malloc(Size+1); - BufferEnd = BufferStart+Size; - memcpy(const_cast(BufferStart), BufStart, Size); - *const_cast(BufferEnd) = 0; // Null terminate buffer. - MustDeleteBuffer = true; -} +MemoryBuffer::~MemoryBuffer() { } /// init - Initialize this MemoryBuffer as a reference to externally allocated /// memory, memory that we know is already null terminated. @@ -60,27 +46,38 @@ void MemoryBuffer::init(const char *BufStart, const char *BufEnd) { assert(BufEnd[0] == 0 && "Buffer is not null terminated!"); BufferStart = BufStart; BufferEnd = BufEnd; - MustDeleteBuffer = false; } //===----------------------------------------------------------------------===// // MemoryBufferMem implementation. //===----------------------------------------------------------------------===// +/// CopyStringRef - Copies contents of a StringRef into a block of memory and +/// null-terminates it. +static void CopyStringRef(char *Memory, StringRef Data) { + memcpy(Memory, Data.data(), Data.size()); + Memory[Data.size()] = 0; // Null terminate string. +} + +/// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. +template +static T* GetNamedBuffer(StringRef Buffer, StringRef Name) { + char *Mem = static_cast(operator new(sizeof(T) + Name.size() + 1)); + CopyStringRef(Mem + sizeof(T), Name); + return new (Mem) T(Buffer); +} + namespace { +/// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. class MemoryBufferMem : public MemoryBuffer { - std::string FileID; public: - MemoryBufferMem(StringRef InputData, StringRef FID, bool Copy = false) - : FileID(FID) { - if (!Copy) - init(InputData.data(), InputData.data()+InputData.size()); - else - initCopyOf(InputData.data(), InputData.data()+InputData.size()); + MemoryBufferMem(StringRef InputData) { + init(InputData.begin(), InputData.end()); } - + virtual const char *getBufferIdentifier() const { - return FileID.c_str(); + // The name is stored after the class itself. + return reinterpret_cast(this + 1); } }; } @@ -88,42 +85,55 @@ public: /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, - const char *BufferName) { - return new MemoryBufferMem(InputData, BufferName); + StringRef BufferName) { + return GetNamedBuffer(InputData, BufferName); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. This has no requirements /// on EndPtr[0]. MemoryBuffer *MemoryBuffer::getMemBufferCopy(StringRef InputData, - const char *BufferName) { - return new MemoryBufferMem(InputData, BufferName, true); + StringRef BufferName) { + MemoryBuffer *Buf = getNewUninitMemBuffer(InputData.size(), BufferName); + if (!Buf) return 0; + memcpy(const_cast(Buf->getBufferStart()), InputData.data(), + InputData.size()); + return Buf; } /// getNewUninitMemBuffer - Allocate a new MemoryBuffer of the specified size -/// that is completely initialized to zeros. Note that the caller should -/// initialize the memory allocated by this method. The memory is owned by -/// the MemoryBuffer object. +/// that is not initialized. Note that the caller should initialize the +/// memory allocated by this method. The memory is owned by the MemoryBuffer +/// object. MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, StringRef BufferName) { - char *Buf = (char *)malloc(Size+1); - if (!Buf) return 0; - Buf[Size] = 0; - MemoryBufferMem *SB = new MemoryBufferMem(StringRef(Buf, Size), BufferName); - // The memory for this buffer is owned by the MemoryBuffer. - SB->MustDeleteBuffer = true; - return SB; + // Allocate space for the MemoryBuffer, the data and the name. It is important + // that MemoryBuffer and data are aligned so PointerIntPair works with them. + size_t AlignedStringLen = + RoundUpToAlignment(sizeof(MemoryBufferMem) + BufferName.size() + 1, + sizeof(void*)); // TODO: Is sizeof(void*) enough? + size_t RealLen = AlignedStringLen + Size + 1; + char *Mem = static_cast(operator new(RealLen, std::nothrow)); + if (!Mem) return 0; + + // The name is stored after the class itself. + CopyStringRef(Mem + sizeof(MemoryBufferMem), BufferName); + + // The buffer begins after the name and must be aligned. + char *Buf = Mem + AlignedStringLen; + Buf[Size] = 0; // Null terminate buffer. + + return new (Mem) MemoryBufferMem(StringRef(Buf, Size)); } /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that /// is completely initialized to zeros. Note that the caller should /// initialize the memory allocated by this method. The memory is owned by /// the MemoryBuffer object. -MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, - const char *BufferName) { +MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { MemoryBuffer *SB = getNewUninitMemBuffer(Size, BufferName); if (!SB) return 0; - memset(const_cast(SB->getBufferStart()), 0, Size+1); + memset(const_cast(SB->getBufferStart()), 0, Size); return SB; } @@ -137,7 +147,16 @@ MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename, int64_t FileSize, struct stat *FileInfo) { if (Filename == "-") - return getSTDIN(); + return getSTDIN(ErrStr); + return getFile(Filename, ErrStr, FileSize, FileInfo); +} + +MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename, + std::string *ErrStr, + int64_t FileSize, + struct stat *FileInfo) { + if (strcmp(Filename, "-") == 0) + return getSTDIN(ErrStr); return getFile(Filename, ErrStr, FileSize, FileInfo); } @@ -149,18 +168,11 @@ namespace { /// MemoryBufferMMapFile - This represents a file that was mapped in with the /// sys::Path::MapInFilePages method. When destroyed, it calls the /// sys::Path::UnMapFilePages method. -class MemoryBufferMMapFile : public MemoryBuffer { - std::string Filename; +class MemoryBufferMMapFile : public MemoryBufferMem { public: - MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size) - : Filename(filename) { - init(Pages, Pages+Size); - } - - virtual const char *getBufferIdentifier() const { - return Filename.c_str(); - } - + MemoryBufferMMapFile(StringRef Buffer) + : MemoryBufferMem(Buffer) { } + ~MemoryBufferMMapFile() { sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); } @@ -170,19 +182,24 @@ public: class FileCloser { int FD; public: - FileCloser(int FD) : FD(FD) {} + explicit FileCloser(int FD) : FD(FD) {} ~FileCloser() { ::close(FD); } }; } MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, int64_t FileSize, struct stat *FileInfo) { - int OpenFlags = 0; + SmallString<256> PathBuf(Filename.begin(), Filename.end()); + return MemoryBuffer::getFile(PathBuf.c_str(), ErrStr, FileSize, FileInfo); +} + +MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr, + int64_t FileSize, struct stat *FileInfo) { + int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. #endif - SmallString<256> PathBuf(Filename.begin(), Filename.end()); - int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags); + int FD = ::open(Filename, OpenFlags); if (FD == -1) { if (ErrStr) *ErrStr = sys::StrError(); return 0; @@ -213,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, if (FileSize >= 4096*4 && (FileSize & (sys::Process::GetPageSize()-1)) != 0) { if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { - // Close the file descriptor, now that the whole file is in memory. - return new MemoryBufferMMapFile(Filename, Pages, FileSize); + return GetNamedBuffer(StringRef(Pages, FileSize), + Filename); } } @@ -254,34 +271,27 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr, // MemoryBuffer::getSTDIN implementation. //===----------------------------------------------------------------------===// -namespace { -class STDINBufferFile : public MemoryBuffer { -public: - virtual const char *getBufferIdentifier() const { - return ""; - } -}; -} - -MemoryBuffer *MemoryBuffer::getSTDIN() { - char Buffer[4096*4]; - - std::vector FileData; - +MemoryBuffer *MemoryBuffer::getSTDIN(std::string *ErrStr) { // Read in all of the data from stdin, we cannot mmap stdin. // // FIXME: That isn't necessarily true, we should try to mmap stdin and // fallback if it fails. sys::Program::ChangeStdinToBinary(); - size_t ReadBytes; + + const ssize_t ChunkSize = 4096*4; + SmallString Buffer; + ssize_t ReadBytes; + // Read into Buffer until we hit EOF. do { - ReadBytes = fread(Buffer, sizeof(char), sizeof(Buffer), stdin); - FileData.insert(FileData.end(), Buffer, Buffer+ReadBytes); - } while (ReadBytes == sizeof(Buffer)); - - FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end(). - size_t Size = FileData.size(); - MemoryBuffer *B = new STDINBufferFile(); - B->initCopyOf(&FileData[0], &FileData[Size-1]); - return B; + Buffer.reserve(Buffer.size() + ChunkSize); + ReadBytes = read(0, Buffer.end(), ChunkSize); + if (ReadBytes == -1) { + if (errno == EINTR) continue; + if (ErrStr) *ErrStr = sys::StrError(); + return 0; + } + Buffer.set_size(Buffer.size() + ReadBytes); + } while (ReadBytes != 0); + + return getMemBufferCopy(Buffer, ""); } diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index 7a04a53..a99ab2f 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -12,11 +12,17 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" #include "llvm/System/ThreadLocal.h" #include "llvm/ADT/SmallString.h" + +#ifdef HAVE_CRASHREPORTERCLIENT_H +#include +#endif + using namespace llvm; namespace llvm { @@ -48,8 +54,17 @@ static void PrintCurStackTrace(raw_ostream &OS) { OS.flush(); } -// Integrate with crash reporter. -#ifdef __APPLE__ +// Integrate with crash reporter libraries. +#if defined (__APPLE__) && defined (HAVE_CRASHREPORTERCLIENT_H) +// If any clients of llvm try to link to libCrashReporterClient.a themselves, +// only one crash info struct will be used. +extern "C" { +CRASH_REPORTER_CLIENT_HIDDEN +struct crashreporter_annotations_t gCRAnnotations + __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) + = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0 }; +} +#elif defined (__APPLE__) static const char *__crashreporter_info__ = 0; asm(".desc ___crashreporter_info__, 0x10"); #endif @@ -71,7 +86,11 @@ static void CrashHandler(void *Cookie) { } if (!TmpStr.empty()) { +#ifndef HAVE_CRASHREPORTERCLIENT_H __crashreporter_info__ = strdup(std::string(TmpStr.str()).c_str()); +#else + CRSetCrashLogMessage(std::string(TmpStr.str()).c_str()); +#endif errs() << TmpStr.str(); } diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 68938fa..504e649 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -166,10 +166,13 @@ void SmallPtrSetImpl::Grow() { } } -SmallPtrSetImpl::SmallPtrSetImpl(const SmallPtrSetImpl& that) { +SmallPtrSetImpl::SmallPtrSetImpl(const void **SmallStorage, + const SmallPtrSetImpl& that) { + SmallArray = SmallStorage; + // If we're becoming small, prepare to insert into our stack space if (that.isSmall()) { - CurArray = &SmallArray[0]; + CurArray = SmallArray; // Otherwise, allocate new heap space (unless we were the same size) } else { CurArray = (const void**)malloc(sizeof(void*) * (that.CurArraySize+1)); @@ -197,7 +200,7 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { if (RHS.isSmall()) { if (!isSmall()) free(CurArray); - CurArray = &SmallArray[0]; + CurArray = SmallArray; // Otherwise, allocate new heap space (unless we were the same size) } else if (CurArraySize != RHS.CurArraySize) { if (isSmall()) diff --git a/lib/Support/SmallVector.cpp b/lib/Support/SmallVector.cpp index 6821382..2e17af8 100644 --- a/lib/Support/SmallVector.cpp +++ b/lib/Support/SmallVector.cpp @@ -21,15 +21,18 @@ void SmallVectorBase::grow_pod(size_t MinSizeInBytes, size_t TSize) { size_t NewCapacityInBytes = 2 * capacity_in_bytes(); if (NewCapacityInBytes < MinSizeInBytes) NewCapacityInBytes = MinSizeInBytes; - void *NewElts = operator new(NewCapacityInBytes); - - // Copy the elements over. No need to run dtors on PODs. - memcpy(NewElts, this->BeginX, CurSizeBytes); - - // If this wasn't grown from the inline copy, deallocate the old space. - if (!this->isSmall()) - operator delete(this->BeginX); - + + void *NewElts; + if (this->isSmall()) { + NewElts = malloc(NewCapacityInBytes); + + // Copy the elements over. No need to run dtors on PODs. + memcpy(NewElts, this->BeginX, CurSizeBytes); + } else { + // If this wasn't grown from the inline copy, grow the allocated space. + NewElts = realloc(this->BeginX, NewCapacityInBytes); + } + this->EndX = (char*)NewElts+CurSizeBytes; this->BeginX = NewElts; this->CapacityX = (char*)this->BeginX + NewCapacityInBytes; diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 784b77c..44ee177 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -236,11 +236,13 @@ static Timer &getNamedRegionTimer(StringRef Name) { return T; } -NamedRegionTimer::NamedRegionTimer(StringRef Name) - : TimeRegion(getNamedRegionTimer(Name)) {} +NamedRegionTimer::NamedRegionTimer(StringRef Name, + bool Enabled) + : TimeRegion(!Enabled ? 0 : &getNamedRegionTimer(Name)) {} -NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName) - : TimeRegion(NamedGroupedTimers->get(Name, GroupName)) {} +NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName, + bool Enabled) + : TimeRegion(!Enabled ? 0 : &NamedGroupedTimers->get(Name, GroupName)) {} //===----------------------------------------------------------------------===// // TimerGroup Implementation diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 9796ca5..6a70449 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -104,6 +104,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case Solaris: return "solaris"; case Win32: return "win32"; case Haiku: return "haiku"; + case Minix: return "minix"; } return ""; @@ -326,7 +327,9 @@ void Triple::Parse() const { else if (OSName.startswith("win32")) OS = Win32; else if (OSName.startswith("haiku")) - OS = Haiku; + OS = Haiku; + else if (OSName.startswith("minix")) + OS = Minix; else OS = UnknownOS; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 11cf0ec..8054ae6 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -427,10 +427,9 @@ raw_fd_ostream::~raw_fd_ostream() { void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) { assert(FD >= 0 && "File already closed."); pos += Size; - ssize_t ret; do { - ret = ::write(FD, Ptr, Size); + ssize_t ret = ::write(FD, Ptr, Size); if (ret < 0) { // If it's a recoverable error, swallow it and retry the write. @@ -482,7 +481,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { } size_t raw_fd_ostream::preferred_buffer_size() const { -#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(_MINIX) +#if !defined(_MSC_VER) && !defined(__MINGW32__) && !defined(__minix) // Windows and Minix have no st_blksize. assert(FD >= 0 && "File not yet open!"); struct stat statbuf; @@ -496,8 +495,9 @@ size_t raw_fd_ostream::preferred_buffer_size() const { return 0; // Return the preferred block size. return statbuf.st_blksize; -#endif +#else return raw_ostream::preferred_buffer_size(); +#endif } raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, diff --git a/lib/System/Disassembler.cpp b/lib/System/Disassembler.cpp index bad427a..139e3be 100644 --- a/lib/System/Disassembler.cpp +++ b/lib/System/Disassembler.cpp @@ -44,33 +44,29 @@ std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length, uint64_t pc) { std::stringstream res; -#if defined (__i386__) || defined (__amd64__) || defined (__x86_64__) +#if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \ + && USE_UDIS86 unsigned bits; # if defined(__i386__) bits = 32; # else bits = 64; # endif - -# if USE_UDIS86 + ud_t ud_obj; - + ud_init(&ud_obj); ud_set_input_buffer(&ud_obj, start, length); ud_set_mode(&ud_obj, bits); ud_set_pc(&ud_obj, pc); ud_set_syntax(&ud_obj, UD_SYN_ATT); - + res << std::setbase(16) << std::setw(bits/4); - + while (ud_disassemble(&ud_obj)) { res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n"; } -# else - res << "No disassembler available. See configure help for options.\n"; -# endif - #else res << "No disassembler available. See configure help for options.\n"; #endif diff --git a/lib/System/Path.cpp b/lib/System/Path.cpp index 6844530..1235257 100644 --- a/lib/System/Path.cpp +++ b/lib/System/Path.cpp @@ -136,26 +136,23 @@ sys::IdentifyFileType(const char *magic, unsigned length) { bool Path::isArchive() const { - if (canRead()) - return hasMagicNumber("!\012"); - return false; + return hasMagicNumber("!\012"); } bool Path::isDynamicLibrary() const { - if (canRead()) { - std::string Magic; - if (getMagicNumber(Magic, 64)) - switch (IdentifyFileType(Magic.c_str(), - static_cast(Magic.length()))) { - default: return false; - case Mach_O_FixedVirtualMemorySharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLib_FileType: - case Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case ELF_SharedObject_FileType: - case COFF_FileType: return true; - } - } + std::string Magic; + if (getMagicNumber(Magic, 64)) + switch (IdentifyFileType(Magic.c_str(), + static_cast(Magic.length()))) { + default: return false; + case Mach_O_FixedVirtualMemorySharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLib_FileType: + case Mach_O_DynamicallyLinkedSharedLibStub_FileType: + case ELF_SharedObject_FileType: + case COFF_FileType: return true; + } + return false; } diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index 74596dc..bc104a3 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -421,10 +421,8 @@ bool Path::getMagicNumber(std::string &Magic, unsigned len) const { return false; ssize_t bytes_read = ::read(fd, Buf, len); ::close(fd); - if (ssize_t(len) != bytes_read) { - Magic.clear(); + if (ssize_t(len) != bytes_read) return false; - } Magic.assign(Buf, len); return true; } @@ -890,14 +888,19 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { #else // Okay, looks like we have to do it all by our lonesome. static unsigned FCounter = 0; - unsigned offset = path.size() + 1; - while ( FCounter < 999999 && exists()) { - sprintf(FNBuffer+offset,"%06u",++FCounter); + // Try to initialize with unique value. + if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8; + char* pos = strstr(FNBuffer, "XXXXXX"); + do { + if (++FCounter > 0xFFFFFF) { + return MakeErrMsg(ErrMsg, + path + ": can't make unique filename: too many files"); + } + sprintf(pos, "%06X", FCounter); path = FNBuffer; - } - if (FCounter > 999999) - return MakeErrMsg(ErrMsg, - path + ": can't make unique filename: too many files"); + } while (exists()); + // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit + // LLVM. #endif return false; } diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc index 358415f..67018de 100644 --- a/lib/System/Unix/Program.inc +++ b/lib/System/Unix/Program.inc @@ -310,7 +310,7 @@ Program::Wait(unsigned secondsToWait, // fact of having a handler at all causes the wait below to return with EINTR, // unlike if we used SIG_IGN. if (secondsToWait) { -#ifndef __HAIKU__ +#if !defined(__HAIKU__) && !defined(__minix) Act.sa_sigaction = 0; #endif Act.sa_handler = TimeOutHandler; diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index 9548816..1e74647 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -111,6 +111,14 @@ static void UnregisterHandlers() { } +/// RemoveFilesToRemove - Process the FilesToRemove list. This function +/// should be called with the SignalsMutex lock held. +static void RemoveFilesToRemove() { + while (!FilesToRemove.empty()) { + FilesToRemove.back().eraseFromDisk(true); + FilesToRemove.pop_back(); + } +} // SignalHandler - The signal handler that runs. static RETSIGTYPE SignalHandler(int Sig) { @@ -126,10 +134,7 @@ static RETSIGTYPE SignalHandler(int Sig) { sigprocmask(SIG_UNBLOCK, &SigMask, 0); SignalsMutex.acquire(); - while (!FilesToRemove.empty()) { - FilesToRemove.back().eraseFromDisk(true); - FilesToRemove.pop_back(); - } + RemoveFilesToRemove(); if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) { if (InterruptFunction) { @@ -153,7 +158,9 @@ static RETSIGTYPE SignalHandler(int Sig) { } void llvm::sys::RunInterruptHandlers() { - SignalHandler(SIGINT); + SignalsMutex.acquire(); + RemoveFilesToRemove(); + SignalsMutex.release(); } void llvm::sys::SetInterruptFunction(void (*IF)()) { diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index 5a0052f..379527d 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -281,12 +281,6 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { // FIXME: the above set of functions don't map to Windows very well. -bool -Path::isRootDirectory() const { - size_t len = path.size(); - return len > 0 && path[len-1] == '/'; -} - StringRef Path::getDirname() const { return getDirnameCharSep(path, "/"); } diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc index a3a393c..d6db71b 100644 --- a/lib/System/Win32/Signals.inc +++ b/lib/System/Win32/Signals.inc @@ -283,7 +283,7 @@ static LONG WINAPI LLVMUnhandledExceptionFilter(LPEXCEPTION_POINTERS ep) { #ifdef _MSC_VER if (ExitOnUnhandledExceptions) - _exit(-3); + _exit(-3); #endif // Allow dialog box to pop up allowing choice to start debugger. diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ae7ae59..14825a7 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -90,10 +90,6 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) { } } -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool ModelWithRegSequence(); - FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel); diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index e68354a..d316b13 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,6 +520,70 @@ namespace ARM_AM { // This is stored in two operands [regaddr, align]. The first is the // address register. The second operand is the value of the alignment // specifier to use or zero if no explicit alignment. + // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific + // instruction. + + //===--------------------------------------------------------------------===// + // NEON Modified Immediates + //===--------------------------------------------------------------------===// + // + // Several NEON instructions (e.g., VMOV) take a "modified immediate" + // vector operand, where a small immediate encoded in the instruction + // specifies a full NEON vector value. These modified immediates are + // represented here as encoded integers. The low 8 bits hold the immediate + // value; bit 12 holds the "Op" field of the instruction, and bits 11-8 hold + // the "Cmode" field of the instruction. The interfaces below treat the + // Op and Cmode values as a single 5-bit value. + + static inline unsigned createNEONModImm(unsigned OpCmode, unsigned Val) { + return (OpCmode << 8) | Val; + } + static inline unsigned getNEONModImmOpCmode(unsigned ModImm) { + return (ModImm >> 8) & 0x1f; + } + static inline unsigned getNEONModImmVal(unsigned ModImm) { + return ModImm & 0xff; + } + + /// decodeNEONModImm - Decode a NEON modified immediate value into the + /// element value and the element size in bits. (If the element size is + /// smaller than the vector, it is splatted into all the elements.) + static inline uint64_t decodeNEONModImm(unsigned ModImm, unsigned &EltBits) { + unsigned OpCmode = getNEONModImmOpCmode(ModImm); + unsigned Imm8 = getNEONModImmVal(ModImm); + uint64_t Val = 0; + + if (OpCmode == 0xe) { + // 8-bit vector elements + Val = Imm8; + EltBits = 8; + } else if ((OpCmode & 0xc) == 0x8) { + // 16-bit vector elements + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 16; + } else if ((OpCmode & 0x8) == 0) { + // 32-bit vector elements, zero with one byte set + unsigned ByteNum = (OpCmode & 0x6) >> 1; + Val = Imm8 << (8 * ByteNum); + EltBits = 32; + } else if ((OpCmode & 0xe) == 0xc) { + // 32-bit vector elements, one byte with low bits set + unsigned ByteNum = 1 + (OpCmode & 0x1); + Val = (Imm8 << (8 * ByteNum)) | (0xffff >> (8 * (2 - ByteNum))); + EltBits = 32; + } else if (OpCmode == 0x1e) { + // 64-bit vector elements + for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { + if ((ModImm >> ByteNum) & 1) + Val |= (uint64_t)0xff << (8 * ByteNum); + } + EltBits = 64; + } else { + assert(false && "Unsupported NEON immediate"); + } + return Val; + } } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2528854..49c16f3 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -56,7 +56,7 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, MachineInstr *MI = MBBI; MachineFunction &MF = *MI->getParent()->getParent(); - unsigned TSFlags = MI->getDesc().TSFlags; + uint64_t TSFlags = MI->getDesc().TSFlags; bool isPre = false; switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) { default: return NULL; @@ -199,9 +199,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, bool ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { + MachineBasicBlock::iterator MI, + const std::vector &CSI, + const TargetRegisterInfo *TRI) const { if (CSI.empty()) return false; @@ -227,8 +227,9 @@ ARMBaseInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Insert the spill to the stack frame. The register is killed at the spill // + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); storeRegToStackSlot(MBB, MI, Reg, isKill, - CSI[i].getFrameIdx(), CSI[i].getRegClass(), TRI); + CSI[i].getFrameIdx(), RC, TRI); } return true; } @@ -347,10 +348,8 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; - + const SmallVectorImpl &Cond, + DebugLoc DL) const { ARMFunctionInfo *AFI = MBB.getParent()->getInfo(); int BOpc = !AFI->isThumbFunction() ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB); @@ -364,17 +363,17 @@ ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) // Unconditional branch? - BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB); else - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); return 1; } // Two-way conditional branch. - BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB) + BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()); - BuildMI(&MBB, dl, get(BOpc)).addMBB(FBB); + BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB); return 2; } @@ -487,7 +486,7 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { // Basic size info comes from the TSFlags field. const TargetInstrDesc &TID = MI->getDesc(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; unsigned Opc = MI->getOpcode(); switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) { @@ -524,11 +523,11 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { return 10; case ARM::Int_eh_sjlj_setjmp: case ARM::Int_eh_sjlj_setjmp_nofp: - return 24; + return 20; case ARM::tInt_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp: case ARM::t2Int_eh_sjlj_setjmp_nofp: - return 14; + return 12; case ARM::BR_JTr: case ARM::BR_JTm: case ARM::BR_JTadd: @@ -595,6 +594,7 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, return true; } case ARM::MOVr: + case ARM::MOVr_TC: case ARM::tMOVr: case ARM::tMOVgpr2tgpr: case ARM::tMOVtgpr2gpr: @@ -693,75 +693,44 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool -ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - // tGPR is used sometimes in ARM instructions that need to avoid using - // certain registers. Just treat it as GPR here. - if (DestRC == ARM::tGPRRegisterClass) - DestRC = ARM::GPRRegisterClass; - if (SrcRC == ARM::tGPRRegisterClass) - SrcRC = ARM::GPRRegisterClass; - - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - if (DestRC == ARM::DPR_8RegisterClass) - DestRC = ARM::DPR_VFP2RegisterClass; - if (SrcRC == ARM::DPR_8RegisterClass) - SrcRC = ARM::DPR_VFP2RegisterClass; - - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) - DestRC = ARM::QPRRegisterClass; - if (SrcRC == ARM::QPR_VFP2RegisterClass || - SrcRC == ARM::QPR_8RegisterClass) - SrcRC = ARM::QPRRegisterClass; - - // Allow QQPR / QQPR_VFP2 cross-class copies. - if (DestRC == ARM::QQPR_VFP2RegisterClass) - DestRC = ARM::QQPRRegisterClass; - if (SrcRC == ARM::QQPR_VFP2RegisterClass) - SrcRC = ARM::QQPRRegisterClass; - - // Disallow copies of unequal sizes. - if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) - return false; - - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::SPRRegisterClass) - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) - .addReg(SrcReg)); - else - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else { - unsigned Opc; - - if (DestRC == ARM::SPRRegisterClass) - Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); - else if (DestRC == ARM::DPRRegisterClass) - Opc = ARM::VMOVD; - else if (DestRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass) - // Always use neon reg-reg move if source or dest is NEON-only regclass. - Opc = ARM::VMOVDneon; - else if (DestRC == ARM::QPRRegisterClass) - Opc = ARM::VMOVQ; - else if (DestRC == ARM::QQPRRegisterClass) - Opc = ARM::VMOVQQ; - else if (DestRC == ARM::QQQQPRRegisterClass) - Opc = ARM::VMOVQQQQ; - else - return false; - - AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg)); +void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GPRDest = ARM::GPRRegClass.contains(DestReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + + if (GPRDest && GPRSrc) { + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)))); + return; } - return true; + bool SPRDest = ARM::SPRRegClass.contains(DestReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + + unsigned Opc; + if (SPRDest && SPRSrc) + Opc = ARM::VMOVS; + else if (GPRDest && SPRSrc) + Opc = ARM::VMOVRS; + else if (SPRDest && GPRSrc) + Opc = ARM::VMOVSR; + else if (ARM::DPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVD; + else if (ARM::QPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQ; + else if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQ; + else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) + Opc = ARM::VMOVQQQQ; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg); + MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ) + AddDefaultPred(MIB); } static const @@ -795,30 +764,34 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: // FIXME: Neon instructions should support predicates if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addReg(SrcReg, getKillRegState(isKill)) .addMemOperand(MMO)); } else { @@ -828,12 +801,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { // FIXME: It's possible to only store part of the QQ register if the // spilled def has a sub-register index. - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST2q32)) - .addFrameIndex(FI).addImm(128); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VST1d64Q)) + .addFrameIndex(FI).addImm(16); MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); @@ -850,8 +825,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); + break; + case ARM::QQQQPRRegClassID: { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMD)) .addFrameIndex(FI) @@ -865,6 +840,10 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI); MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI); AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -886,26 +865,30 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // tGPR is used sometimes in ARM instructions that need to avoid using // certain registers. Just treat it as GPR here. - if (RC == ARM::tGPRRegisterClass) + if (RC == ARM::tGPRRegisterClass || RC == ARM::tcGPRRegisterClass) RC = ARM::GPRRegisterClass; - if (RC == ARM::GPRRegisterClass) { + switch (RC->getID()) { + case ARM::GPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) .addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::SPRRegisterClass) { + break; + case ARM::SPRRegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::DPRRegisterClass || - RC == ARM::DPR_VFP2RegisterClass || - RC == ARM::DPR_8RegisterClass) { + break; + case ARM::DPRRegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::DPR_8RegClassID: AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); - } else if (RC == ARM::QPRRegisterClass || - RC == ARM::QPR_VFP2RegisterClass || - RC == ARM::QPR_8RegisterClass) { + break; + case ARM::QPRRegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::QPR_8RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg) - .addFrameIndex(FI).addImm(128) + .addFrameIndex(FI).addImm(16) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg) @@ -913,14 +896,16 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) .addMemOperand(MMO)); } - } else if (RC == ARM::QQPRRegisterClass || RC == ARM::QQPR_VFP2RegisterClass){ + break; + case ARM::QQPRRegClassID: + case ARM::QQPR_VFP2RegClassID: if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) { - MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD2q32)); + MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLD1d64Q)); MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - AddDefaultPred(MIB.addFrameIndex(FI).addImm(128).addMemOperand(MMO)); + AddDefaultPred(MIB.addFrameIndex(FI).addImm(16).addMemOperand(MMO)); } else { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) @@ -932,21 +917,25 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); } - } else { - assert(RC == ARM::QQQQPRRegisterClass && "Unknown regclass!"); - MachineInstrBuilder MIB = - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) - .addFrameIndex(FI) - .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) - .addMemOperand(MMO); - MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); - MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); - AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + case ARM::QQQQPRRegClassID: { + MachineInstrBuilder MIB = + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMD)) + .addFrameIndex(FI) + .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))) + .addMemOperand(MMO); + MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::Define, TRI); + MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::Define, TRI); + AddDReg(MIB, DestReg, ARM::dsub_7, RegState::Define, TRI); + break; + } + default: + llvm_unreachable("Unknown regclass!"); } } @@ -960,223 +949,6 @@ ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -MachineInstr *ARMBaseInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - if (MI->getOperand(4).getReg() == ARM::CPSR && !MI->getOperand(4).isDead()) - return NULL; - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::STR)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (Opc == ARM::MOVr) - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::LDR)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addReg(0).addImm(0).addImm(Pred).addReg(PredReg); - else // ARM::t2MOVr - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2STRi12)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::t2LDRi12)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); - } - } else if (Opc == ARM::VMOVS) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI) - .addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVDneon) { - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); - } - } else if (Opc == ARM::VMOVQ) { - MachineFrameInfo &MFI = *MF.getFrameInfo(); - unsigned Pred = MI->getOperand(2).getImm(); - unsigned PredReg = MI->getOperand(3).getReg(); - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - unsigned SrcSubReg = MI->getOperand(1).getSubReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VST1q)) - .addFrameIndex(FI).addImm(128) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTMQ)) - .addReg(SrcReg, - getKillRegState(isKill) | getUndefRegState(isUndef), - SrcSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned DstSubReg = MI->getOperand(0).getSubReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - if (MFI.getObjectAlignment(FI) >= 16 && - getRegisterInfo().canRealignStack(MF)) { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLD1q)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(128).addImm(Pred).addReg(PredReg); - } else { - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDMQ)) - .addReg(DstReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef), - DstSubReg) - .addFrameIndex(FI).addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4)) - .addImm(Pred).addReg(PredReg); - } - } - } - - return NewMI; -} - -MachineInstr* -ARMBaseInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - // FIXME - return 0; -} - -bool -ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { - // If it is updating CPSR, then it cannot be folded. - return MI->getOperand(4).getReg() != ARM::CPSR || - MI->getOperand(4).isDead(); - } else if (Opc == ARM::tMOVgpr2gpr || - Opc == ARM::tMOVtgpr2gpr || - Opc == ARM::tMOVgpr2tgpr) { - return true; - } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD || - Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { - return true; - } - - // FIXME: VMOVQQ and VMOVQQQQ? - - return false; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1211,17 +983,12 @@ reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - + const TargetRegisterInfo &TRI) const { unsigned Opcode = Orig->getOpcode(); switch (Opcode) { default: { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); + MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); break; } @@ -1237,9 +1004,6 @@ reMaterialize(MachineBasicBlock &MBB, break; } } - - MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); } MachineInstr * @@ -1291,6 +1055,165 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); } +/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to +/// determine if two loads are loading from the same base address. It should +/// only return true if the base pointers are the same and the only differences +/// between the two addresses is the offset. It also returns the offsets by +/// reference. +bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, + int64_t &Offset2) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode()) + return false; + + switch (Load1->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + switch (Load2->getMachineOpcode()) { + default: + return false; + case ARM::LDR: + case ARM::LDRB: + case ARM::LDRD: + case ARM::LDRH: + case ARM::LDRSB: + case ARM::LDRSH: + case ARM::VLDRD: + case ARM::VLDRS: + case ARM::t2LDRi8: + case ARM::t2LDRDi8: + case ARM::t2LDRSHi8: + case ARM::t2LDRi12: + case ARM::t2LDRSHi12: + break; + } + + // Check if base addresses and chain operands match. + if (Load1->getOperand(0) != Load2->getOperand(0) || + Load1->getOperand(4) != Load2->getOperand(4)) + return false; + + // Index should be Reg0. + if (Load1->getOperand(3) != Load2->getOperand(3)) + return false; + + // Determine the offsets. + if (isa(Load1->getOperand(1)) && + isa(Load2->getOperand(1))) { + Offset1 = cast(Load1->getOperand(1))->getSExtValue(); + Offset2 = cast(Load2->getOperand(1))->getSExtValue(); + return true; + } + + return false; +} + +/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to +/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// be scheduled togther. On some targets if two loads are loading from +/// addresses in the same cache line, it's better if they are scheduled +/// together. This function takes two integers that represent the load offsets +/// from the common base address. It returns true if it decides it's desirable +/// to schedule the two loads together. "NumLoads" is the number of loads that +/// have already been scheduled after Load1. +bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const { + // Don't worry about Thumb: just ARM and Thumb2. + if (Subtarget.isThumb1Only()) return false; + + assert(Offset2 > Offset1); + + if ((Offset2 - Offset1) / 8 > 64) + return false; + + if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + return false; // FIXME: overly conservative? + + // Four loads in a row should be sufficient. + if (NumLoads >= 3) + return false; + + return true; +} + +bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Debug info is never a scheduling boundary. It's necessary to be explicit + // due to the special treatment of IT instructions below, otherwise a + // dbg_value followed by an IT will result in the IT instruction being + // considered a scheduling hazard, which is wrong. It should be the actual + // instruction preceding the dbg_value instruction(s), just like it is + // when debug info is not present. + if (MI->isDebugValue()) + return false; + + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Treat the start of the IT block as a scheduling boundary, but schedule + // t2IT along with all instructions following it. + // FIXME: This is a big hammer. But the alternative is to add all potential + // true and anti dependencies to IT block instructions as implicit operands + // to the t2IT instruction. The added compile time and complexity does not + // seem worth it. + MachineBasicBlock::const_iterator I = MI; + // Make sure to skip any dbg_value instructions + while (++I != MBB->end() && I->isDebugValue()) + ; + if (I != MBB->end() && I->getOpcode() == ARM::t2IT) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + if (MI->definesRegister(ARM::SP)) + return true; + + return false; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const { + if (!NumInstrs) + return false; + if (Subtarget.getCPUString() == "generic") + // Generic (and overly aggressive) if-conversion limits for testing. + return NumInstrs <= 10; + else if (Subtarget.hasV7Ops()) + return NumInstrs <= 3; + return NumInstrs <= 2; +} + +bool ARMBaseInstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + return NumT && NumF && NumT <= 2 && NumF <= 2; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b566271..89a2db7 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -116,11 +116,25 @@ namespace ARMII { // Thumb format ThumbFrm = 24 << FormShift, - // NEON format - NEONFrm = 25 << FormShift, - NEONGetLnFrm = 26 << FormShift, - NEONSetLnFrm = 27 << FormShift, - NEONDupFrm = 28 << FormShift, + // Miscelleaneous format + MiscFrm = 25 << FormShift, + + // NEON formats + NGetLnFrm = 26 << FormShift, + NSetLnFrm = 27 << FormShift, + NDupFrm = 28 << FormShift, + NLdStFrm = 29 << FormShift, + N1RegModImmFrm= 30 << FormShift, + N2RegFrm = 31 << FormShift, + NVCVTFrm = 32 << FormShift, + NVDupLnFrm = 33 << FormShift, + N2RegVShLFrm = 34 << FormShift, + N2RegVShRFrm = 35 << FormShift, + N3RegFrm = 36 << FormShift, + N3RegVShFrm = 37 << FormShift, + NVExtFrm = 38 << FormShift, + NVMulSLFrm = 39 << FormShift, + NVTBLFrm = 40 << FormShift, //===------------------------------------------------------------------===// // Misc flags. @@ -213,7 +227,8 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; @@ -258,12 +273,10 @@ public: virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -283,29 +296,51 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const; virtual bool produceSameValue(const MachineInstr *MI0, const MachineInstr *MI1) const; + + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to + /// determine if two loads are loading from the same base address. It should + /// only return true if the base pointers are the same and the only + /// differences between the two addresses is the offset. It also returns the + /// offsets by reference. + virtual bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, + int64_t &Offset1, int64_t &Offset2)const; + + /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to + /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// be scheduled togther. On some targets if two loads are loading from + /// addresses in the same cache line, it's better if they are scheduled + /// together. This function takes two integers that represent the load offsets + /// from the common base address. It returns true if it decides it's desirable + /// to schedule the two loads together. "NumLoads" is the number of loads that + /// have already been scheduled after Load1. + virtual bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, + int64_t Offset1, int64_t Offset2, + unsigned NumLoads) const; + + virtual bool isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const; + + virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT, + MachineBasicBlock &FMBB,unsigned NumF) const; + + virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs == 1; + } }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 82458d2..182bd99 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -170,56 +170,6 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return STI.isTargetDarwin() ? DarwinCalleeSavedRegs : CalleeSavedRegs; } -const TargetRegisterClass* const * -ARMBaseRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const ThumbCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass,&ARM::tGPRRegClass,&ARM::tGPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinCalleeSavedRegClasses[] = { - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - static const TargetRegisterClass * const DarwinThumbCalleeSavedRegClasses[] ={ - &ARM::GPRRegClass, &ARM::tGPRRegClass, &ARM::tGPRRegClass, - &ARM::tGPRRegClass, &ARM::tGPRRegClass, &ARM::GPRRegClass, - &ARM::GPRRegClass, &ARM::GPRRegClass, - - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, &ARM::DPRRegClass, - 0 - }; - - if (STI.isThumb1Only()) { - return STI.isTargetDarwin() - ? DarwinThumbCalleeSavedRegClasses : ThumbCalleeSavedRegClasses; - } - return STI.isTargetDarwin() - ? DarwinCalleeSavedRegClasses : CalleeSavedRegClasses; -} - BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { // FIXME: avoid re-calculating this everytime. @@ -352,7 +302,7 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, } bool -ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC, +ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, SmallVectorImpl &SubIndices, unsigned &NewSubIdx) const { @@ -724,6 +674,15 @@ ARMBaseRegisterInfo::estimateRSStackSizeLimit(MachineFunction &MF) const { I != E; ++I) { for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { if (!I->getOperand(i).isFI()) continue; + + // When using ADDri to get the address of a stack object, 255 is the + // largest offset guaranteed to fit in the immediate offset. + if (I->getOpcode() == ARM::ADDri) { + Limit = std::min(Limit, (1U << 8) - 1); + break; + } + + // Otherwise check the addressing mode. switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { case ARMII::AddrMode3: case ARMII::AddrModeT2_i8: @@ -765,6 +724,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, SmallVector UnspilledCS1GPRs; SmallVector UnspilledCS2GPRs; ARMFunctionInfo *AFI = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. @@ -780,7 +740,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. const unsigned *CSRegs = getCalleeSavedRegs(); - const TargetRegisterClass* const *CSRegClasses = getCalleeSavedRegClasses(); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -798,50 +757,50 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } } - if (CSRegClasses[i] == ARM::GPRRegisterClass || - CSRegClasses[i] == ARM::tGPRRegisterClass) { - if (Spilled) { - NumGPRSpills++; + if (!ARM::GPRRegisterClass->contains(Reg)) + continue; - if (!STI.isTargetDarwin()) { - if (Reg == ARM::LR) - LRSpilled = true; - CS1Spilled = true; - continue; - } + if (Spilled) { + NumGPRSpills++; - // Keep track if LR and any of R4, R5, R6, and R7 is spilled. - switch (Reg) { - case ARM::LR: + if (!STI.isTargetDarwin()) { + if (Reg == ARM::LR) LRSpilled = true; - // Fallthrough - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - CS1Spilled = true; - break; - default: - break; - } - } else { - if (!STI.isTargetDarwin()) { - UnspilledCS1GPRs.push_back(Reg); - continue; - } + CS1Spilled = true; + continue; + } - switch (Reg) { - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - UnspilledCS1GPRs.push_back(Reg); - break; - default: - UnspilledCS2GPRs.push_back(Reg); - break; - } + // Keep track if LR and any of R4, R5, R6, and R7 is spilled. + switch (Reg) { + case ARM::LR: + LRSpilled = true; + // Fallthrough + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + CS1Spilled = true; + break; + default: + break; + } + } else { + if (!STI.isTargetDarwin()) { + UnspilledCS1GPRs.push_back(Reg); + continue; + } + + switch (Reg) { + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + UnspilledCS1GPRs.push_back(Reg); + break; + default: + UnspilledCS2GPRs.push_back(Reg); + break; } } } @@ -862,9 +821,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // offset, make sure a register (or a spill slot) is available for the // register scavenger. Note that if we're indexing off the frame pointer, the // effective stack size is 4 bytes larger since the FP points to the stack - // slot of the previous FP. - bool BigStack = RS && - estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF); + // slot of the previous FP. Also, if we have variable sized objects in the + // function, stack slot references will often be negative, and some of + // our instructions are positive-offset only, so conservatively consider + // that case to want a spill slot (or register) as well. + // FIXME: We could add logic to be more precise about negative offsets + // and which instructions will need a scratch register for them. Is it + // worth the effort and added fragility? + bool BigStack = + (RS && (estimateStackSize(MF) + (hasFP(MF) ? 4:0) >= + estimateRSStackSizeLimit(MF))) || MFI->hasVarSizedObjects(); bool ExtraCSSpill = false; if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) { @@ -957,7 +923,6 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // note: Thumb1 functions spill to R12, not the stack. Reserve a slot // closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; - MachineFrameInfo *MFI = MF.getFrameInfo(); RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); @@ -1622,6 +1587,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { MachineBasicBlock::iterator MBBI = prior(MBB.end()); assert(MBBI->getDesc().isReturn() && "Can only insert epilog into returning blocks"); + unsigned RetOpcode = MBBI->getOpcode(); DebugLoc dl = MBBI->getDebugLoc(); MachineFrameInfo *MFI = MF.getFrameInfo(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -1696,6 +1662,39 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getGPRCalleeSavedArea1Size()); } + if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || + RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { + // Tail call return: adjust the stack pointer and jump to callee. + MBBI = prior(MBB.end()); + MachineOperand &JumpTarget = MBBI->getOperand(0); + + // Jump to label or value in register. + if (RetOpcode == ARM::TCRETURNdi) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNdiND) { + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND)). + addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), + JumpTarget.getTargetFlags()); + } else if (RetOpcode == ARM::TCRETURNri) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); + } else if (RetOpcode == ARM::TCRETURNriND) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + addReg(JumpTarget.getReg(), RegState::Kill); + } + + MachineInstr *NewMI = prior(MBBI); + for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) + NewMI->addOperand(MBBI->getOperand(i)); + + // Delete the pseudo instruction TCRETURN. + MBB.erase(MBBI); + } + if (VARegSaveSize) emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 2c9c82d..f7ee0d5 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -69,9 +69,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// getMatchingSuperRegClass - Return a subclass of the specified register @@ -81,14 +78,15 @@ public: getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; - /// canCombinedSubRegIndex - Given a register class and a list of sub-register - /// indices, return true if it's possible to combine the sub-register indices - /// into one that corresponds to a larger sub-register. Return the new sub- - /// register index by reference. Note the new index by be zero if the given - /// sub-registers combined to form the whole register. - virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC, - SmallVectorImpl &SubIndices, - unsigned &NewSubIdx) const; + /// canCombineSubRegIndices - Given a register class and a list of + /// subregister indices, return true if it's possible to combine the + /// subregister indices into one that corresponds to a larger + /// subregister. Return the new subregister index by reference. Note the + /// new index may be zero if the given subregisters can be combined to + /// form the whole register. + virtual bool canCombineSubRegIndices(const TargetRegisterClass *RC, + SmallVectorImpl &SubIndices, + unsigned &NewSubIdx) const; const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; @@ -150,8 +148,8 @@ public: virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const; virtual void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) const; virtual unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, FrameIndexValue *Value = NULL, diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index f2730fc..7895cb0 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,6 +55,7 @@ namespace { const std::vector *MCPEs; const std::vector *MJTEs; bool IsPIC; + bool IsThumb; void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -67,8 +68,8 @@ namespace { : MachineFunctionPass(&ID), JTI(0), II((const ARMInstrInfo *)tm.getInstrInfo()), TD(tm.getTargetData()), TM(tm), - MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} + MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_), IsThumb(false) {} /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for @@ -139,6 +140,12 @@ namespace { void emitMiscInstruction(const MachineInstr &MI); + void emitNEONLaneInstruction(const MachineInstr &MI); + void emitNEONDupInstruction(const MachineInstr &MI); + void emitNEON1RegModImmInstruction(const MachineInstr &MI); + void emitNEON2RegInstruction(const MachineInstr &MI); + void emitNEON3RegInstruction(const MachineInstr &MI); + /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned getMachineOpValue(const MachineInstr &MI,const MachineOperand &MO); @@ -147,7 +154,8 @@ namespace { } /// getMovi32Value - Return binary encoding of operand for movw/movt. If the - /// machine operand requires relocation, record the relocation and return zero. + /// machine operand requires relocation, record the relocation and return + /// zero. unsigned getMovi32Value(const MachineInstr &MI,const MachineOperand &MO, unsigned Reloc); unsigned getMovi32Value(const MachineInstr &MI, unsigned OpIdx, @@ -193,6 +201,7 @@ bool ARMCodeEmitter::runOnMachineFunction(MachineFunction &MF) { MJTEs = 0; if (MF.getJumpTableInfo()) MJTEs = &MF.getJumpTableInfo()->getJumpTables(); IsPIC = TM.getRelocationModel() == Reloc::PIC_; + IsThumb = MF.getInfo()->isThumbFunction(); JTI->Initialize(MF, IsPIC); MMI = &getAnalysis(); MCE.setModuleInfo(MMI); @@ -347,7 +356,7 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { MCE.processDebugLoc(MI.getDebugLoc(), true); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted switch (MI.getDesc().TSFlags & ARMII::FormMask) { default: { llvm_unreachable("Unhandled instruction encoding format!"); @@ -407,6 +416,23 @@ void ARMCodeEmitter::emitInstruction(const MachineInstr &MI) { case ARMII::VFPMiscFrm: emitMiscInstruction(MI); break; + // NEON instructions. + case ARMII::NGetLnFrm: + case ARMII::NSetLnFrm: + emitNEONLaneInstruction(MI); + break; + case ARMII::NDupFrm: + emitNEONDupInstruction(MI); + break; + case ARMII::N1RegModImmFrm: + emitNEON1RegModImmInstruction(MI); + break; + case ARMII::N2RegFrm: + emitNEON2RegInstruction(MI); + break; + case ARMII::N3RegFrm: + emitNEON3RegInstruction(MI); + break; } MCE.processDebugLoc(MI.getDebugLoc(), false); } @@ -1539,4 +1565,144 @@ void ARMCodeEmitter::emitMiscInstruction(const MachineInstr &MI) { emitWordLE(Binary); } +static unsigned encodeNEONRd(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegD = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegD = ARMRegisterInfo::getRegisterNumbering(RegD); + Binary |= (RegD & 0xf) << ARMII::RegRdShift; + Binary |= ((RegD >> 4) & 1) << ARMII::D_BitShift; + return Binary; +} + +static unsigned encodeNEONRn(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegN = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegN = ARMRegisterInfo::getRegisterNumbering(RegN); + Binary |= (RegN & 0xf) << ARMII::RegRnShift; + Binary |= ((RegN >> 4) & 1) << ARMII::N_BitShift; + return Binary; +} + +static unsigned encodeNEONRm(const MachineInstr &MI, unsigned OpIdx) { + unsigned RegM = MI.getOperand(OpIdx).getReg(); + unsigned Binary = 0; + RegM = ARMRegisterInfo::getRegisterNumbering(RegM); + Binary |= (RegM & 0xf); + Binary |= ((RegM >> 4) & 1) << ARMII::M_BitShift; + return Binary; +} + +/// convertNEONDataProcToThumb - Convert the ARM mode encoding for a NEON +/// data-processing instruction to the corresponding Thumb encoding. +static unsigned convertNEONDataProcToThumb(unsigned Binary) { + assert((Binary & 0xfe000000) == 0xf2000000 && + "not an ARM NEON data-processing instruction"); + unsigned UBit = (Binary >> 24) & 1; + return 0xef000000 | (UBit << 28) | (Binary & 0xffffff); +} + +void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + unsigned RegTOpIdx, RegNOpIdx, LnOpIdx; + const TargetInstrDesc &TID = MI.getDesc(); + if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) { + RegTOpIdx = 0; + RegNOpIdx = 1; + LnOpIdx = 2; + } else { // ARMII::NSetLnFrm + RegTOpIdx = 2; + RegNOpIdx = 0; + LnOpIdx = 3; + } + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(RegTOpIdx).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, RegNOpIdx); + + unsigned LaneShift; + if ((Binary & (1 << 22)) != 0) + LaneShift = 0; // 8-bit elements + else if ((Binary & (1 << 5)) != 0) + LaneShift = 1; // 16-bit elements + else + LaneShift = 2; // 32-bit elements + + unsigned Lane = MI.getOperand(LnOpIdx).getImm() << LaneShift; + unsigned Opc1 = Lane >> 2; + unsigned Opc2 = Lane & 3; + assert((Opc1 & 3) == 0 && "out-of-range lane number operand"); + Binary |= (Opc1 << 21); + Binary |= (Opc2 << 5); + + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEONDupInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + + // Set the conditional execution predicate + Binary |= (IsThumb ? ARMCC::AL : II->getPredicate(&MI)) << ARMII::CondShift; + + unsigned RegT = MI.getOperand(1).getReg(); + RegT = ARMRegisterInfo::getRegisterNumbering(RegT); + Binary |= (RegT << ARMII::RegRdShift); + Binary |= encodeNEONRn(MI, 0); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) { + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd. + Binary |= encodeNEONRd(MI, 0); + // Immediate fields: Op, Cmode, I, Imm3, Imm4 + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Op = (Imm >> 12) & 1; + unsigned Cmode = (Imm >> 8) & 0xf; + unsigned I = (Imm >> 7) & 1; + unsigned Imm3 = (Imm >> 4) & 0x7; + unsigned Imm4 = Imm & 0xf; + Binary |= (I << 24) | (Imm3 << 16) | (Cmode << 8) | (Op << 5) | Imm4; + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source register in Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VDUPfdf or VDUPfqf. + emitWordLE(Binary); +} + +void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) { + const TargetInstrDesc &TID = MI.getDesc(); + unsigned Binary = getBinaryCodeForInstr(MI); + // Destination register is encoded in Dd; source registers in Dn and Dm. + unsigned OpIdx = 0; + Binary |= encodeNEONRd(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRn(MI, OpIdx++); + if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) + ++OpIdx; + Binary |= encodeNEONRm(MI, OpIdx); + if (IsThumb) + Binary = convertNEONDataProcToThumb(Binary); + // FIXME: This does not handle VMOVDneon or VMOVQ. + emitWordLE(Binary); +} + #include "ARMGenCodeEmitter.inc" diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d8b74..65a3da6 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -337,7 +337,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { if (CPChange && ++NoCPIters > 30) llvm_unreachable("Constant Island pass failed to converge!"); DEBUG(dumpBBs()); - + // Clear NewWaterList now. If we split a block for branches, it should // appear as "new water" for the next iteration of constant pool placement. NewWaterList.clear(); @@ -361,8 +361,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // After a while, this might be made debug-only, but it is not expensive. verify(MF); - // If LR has been forced spilled and no far jumps (i.e. BL) has been issued. - // Undo the spill / restore of LR if possible. + // If LR has been forced spilled and no far jump (i.e. BL) has been issued, + // undo the spill / restore of LR if possible. if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump()) MadeChange |= UndoLRSpillRestore(); @@ -407,7 +407,7 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, std::vector CPEs; CPEs.push_back(CPEntry(CPEMI, i)); CPEntries.push_back(CPEs); - NumCPEs++; + ++NumCPEs; DEBUG(errs() << "Moved CPI#" << i << " to end of function as #" << i << "\n"); } @@ -418,7 +418,8 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF, static bool BBHasFallthrough(MachineBasicBlock *MBB) { // Get the next machine basic block in the function. MachineFunction::iterator MBBI = MBB; - if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function. + // Can't fall off end of function. + if (llvm::next(MBBI) == MBB->getParent()->end()) return false; MachineBasicBlock *NextBB = llvm::next(MBBI); @@ -491,6 +492,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, unsigned MBBSize = 0; for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ++I) { + if (I->isDebugValue()) + continue; // Add instruction size to MBBSize. MBBSize += TII->GetInstSizeInBytes(I); @@ -722,7 +725,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) { // correspond to anything in the source. unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B; BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB); - NumSplit++; + ++NumSplit; // Update the CFG. All succs of OrigBB are now succs of NewBB. while (!OrigBB->succ_empty()) { @@ -945,7 +948,7 @@ bool ARMConstantIslands::DecrementOldEntry(unsigned CPI, MachineInstr *CPEMI) { if (--CPE->RefCount == 0) { RemoveDeadCPEMI(CPEMI); CPE->CPEMI = NULL; - NumCPEs--; + --NumCPEs; return true; } return false; @@ -1246,7 +1249,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, U.CPEMI = BuildMI(NewIsland, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1)); - NumCPEs++; + ++NumCPEs; BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; // Compensate for .align 2 in thumb mode. @@ -1369,7 +1372,7 @@ ARMConstantIslands::FixUpUnconditionalBr(MachineFunction &MF, ImmBranch &Br) { BBSizes[MBB->getNumber()] += 2; AdjustBBOffsetsAfter(MBB, 2); HasFarJump = true; - NumUBrFixed++; + ++NumUBrFixed; DEBUG(errs() << " Changed B to long jump " << *MI); @@ -1402,7 +1405,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) { MachineInstr *BMI = &MBB->back(); bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB); - NumCBrFixed++; + ++NumCBrFixed; if (BMI != MI) { if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) && BMI->getOpcode() == Br.UncondBr) { @@ -1621,7 +1624,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // constantpool tables? MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1658,15 +1661,25 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { continue; unsigned IdxReg = MI->getOperand(1).getReg(); bool IdxRegKill = MI->getOperand(1).isKill(); + + // Scan backwards to find the instruction that defines the base + // register. Due to post-RA scheduling, we can't count on it + // immediately preceding the branch instruction. MachineBasicBlock::iterator PrevI = MI; - if (PrevI == MBB->begin()) + MachineBasicBlock::iterator B = MBB->begin(); + while (PrevI != B && !PrevI->definesRegister(BaseReg)) + --PrevI; + + // If for some reason we didn't find it, we can't do anything, so + // just skip this one. + if (!PrevI->definesRegister(BaseReg)) continue; - MachineInstr *AddrMI = --PrevI; + MachineInstr *AddrMI = PrevI; bool OptOk = true; - // Examine the instruction that calculate the jumptable entry address. - // If it's not the one just before the t2BR_JT, we won't delete it, then - // it's not worth doing the optimization. + // Examine the instruction that calculates the jumptable entry address. + // Make sure it only defines the base register and kills any uses + // other than the index register. for (unsigned k = 0, eee = AddrMI->getNumOperands(); k != eee; ++k) { const MachineOperand &MO = AddrMI->getOperand(k); if (!MO.isReg() || !MO.getReg()) @@ -1683,9 +1696,14 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { if (!OptOk) continue; - // The previous instruction should be a tLEApcrel or t2LEApcrelJT, we want + // Now scan back again to find the tLEApcrel or t2LEApcrelJT instruction + // that gave us the initial base register definition. + for (--PrevI; PrevI != B && !PrevI->definesRegister(BaseReg); --PrevI) + ; + + // The instruction should be a tLEApcrel or t2LEApcrelJT; we want // to delete it as well. - MachineInstr *LeaMI = --PrevI; + MachineInstr *LeaMI = PrevI; if ((LeaMI->getOpcode() != ARM::tLEApcrelJT && LeaMI->getOpcode() != ARM::t2LEApcrelJT) || LeaMI->getOperand(0).getReg() != BaseReg) @@ -1729,7 +1747,7 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); if (MJTI == 0) return false; - + const std::vector &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1769,7 +1787,7 @@ AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) { MachineFunction &MF = *BB->getParent(); - // If it's the destination block is terminated by an unconditional branch, + // If the destination block is terminated by an unconditional branch, // try to move it; otherwise, create a new block following the jump // table that branches back to the actual target. This is a very simple // heuristic. FIXME: We can definitely improve it. diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 6f4eddf..3119b54 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include namespace llvm { diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c87f5d7..9c62597 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -144,13 +144,15 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineInstrBuilder Even = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(EvenDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(EvenSrc, getKillRegState(SrcIsKill))); + .addReg(EvenDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(EvenSrc, getKillRegState(SrcIsKill))); MachineInstrBuilder Odd = AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::VMOVQ)) - .addReg(OddDst, getDefRegState(true) | getDeadRegState(DstIsDead)) - .addReg(OddSrc, getKillRegState(SrcIsKill))); + .addReg(OddDst, + getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(OddSrc, getKillRegState(SrcIsKill))); TransferImpOps(MI, Even, Odd); MI.eraseFromParent(); Modified = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9baef6b..c84d3ff 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMTargetMachine.h" @@ -35,11 +36,6 @@ using namespace llvm; -static cl::opt -UseRegSeq("neon-reg-sequence", cl::Hidden, - cl::desc("Use reg_sequence to model ld / st of multiple neon regs"), - cl::init(true)); - //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine /// instructions for SelectionDAG operations. @@ -147,6 +143,11 @@ private: unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVTBL - Select NEON VTBL and VTBX intrinsics. NumVecs should be 2, + /// 3 or 4. These are custom-selected so that a REG_SEQUENCE can be + /// generated to force the table registers to be consecutive. + SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc); + /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); @@ -173,24 +174,17 @@ private: char ConstraintCode, std::vector &OutOps); - /// PairDRegs - Form a quad register from a pair of D registers. - /// + // Form pairs of consecutive S, D, or Q registers. + SDNode *PairSRegs(EVT VT, SDValue V0, SDValue V1); SDNode *PairDRegs(EVT VT, SDValue V0, SDValue V1); - - /// PairDRegs - Form a quad register pair from a pair of Q registers. - /// SDNode *PairQRegs(EVT VT, SDValue V0, SDValue V1); - /// QuadDRegs - Form a quad register pair from a quad of D registers. - /// + // Form sequences of 4 consecutive S, D, or Q registers. + SDNode *QuadSRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); SDNode *QuadDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - - /// QuadQRegs - Form 4 consecutive Q registers. - /// SDNode *QuadQRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); - /// OctoDRegs - Form 8 consecutive D registers. - /// + // Form sequences of 8 consecutive D registers. SDNode *OctoDRegs(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3, SDValue V4, SDValue V5, SDValue V6, SDValue V7); }; @@ -544,10 +538,9 @@ bool ARMDAGToDAGISel::SelectAddrModePC(SDNode *Op, SDValue N, bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset){ // FIXME dl should come from the parent load or store, not the address - DebugLoc dl = Op->getDebugLoc(); if (N.getOpcode() != ISD::ADD) { ConstantSDNode *NC = dyn_cast(N); - if (!NC || NC->getZExtValue() != 0) + if (!NC || !NC->isNullValue()) return false; Base = Offset = N; @@ -788,8 +781,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, if (N.getOpcode() == ISD::ADD) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); + // 8 bits. if (((RHSC & 0x3) == 0) && - ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { // 8 bits. + ((RHSC >= 0 && RHSC < 0x400) || (RHSC < 0 && RHSC > -0x400))) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -798,7 +792,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8s4(SDNode *Op, SDValue N, } else if (N.getOpcode() == ISD::SUB) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getZExtValue(); - if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { // 8 bits. + // 8 bits. + if (((RHSC & 0x3) == 0) && (RHSC >= 0 && RHSC < 0x400)) { Base = N.getOperand(0); OffImm = CurDAG->getTargetConstant(-RHSC, MVT::i32); return true; @@ -960,22 +955,24 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { return NULL; } +/// PairSRegs - Form a D register from a pair of S registers. +/// +SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); +} + /// PairDRegs - Form a quad register from a pair of D registers. /// SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) { DebugLoc dl = V0.getNode()->getDebugLoc(); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); - if (llvm::ModelWithRegSequence()) { - const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; - return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); - } - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0); - SDNode *Pair = CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, Undef, V0, SubReg0); - return CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, - VT, SDValue(Pair, 0), V1, SubReg1); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers. @@ -988,6 +985,19 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) { return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4); } +/// QuadSRegs - Form 4 consecutive S registers. +/// +SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1, + SDValue V2, SDValue V3) { + DebugLoc dl = V0.getNode()->getDebugLoc(); + SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); + SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32); + SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32); + SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32); + const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 }; + return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8); +} + /// QuadDRegs - Form 4 consecutive D registers. /// SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1, @@ -1088,7 +1098,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); - if (!llvm::ModelWithRegSequence() || NumVecs < 2) + if (NumVecs < 2) return VLd; SDValue RegSeq; @@ -1129,24 +1139,17 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, Chain = SDValue(VLd, 2 * NumVecs); // Combine the even and odd subregs to produce the result. - if (llvm::ModelWithRegSequence()) { - if (NumVecs == 1) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); - ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); - } else { - SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, - SDValue(VLd, 0), SDValue(VLd, 1), - SDValue(VLd, 2), SDValue(VLd, 3)), 0); - SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); - SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); - ReplaceUses(SDValue(N, 0), Q0); - ReplaceUses(SDValue(N, 1), Q1); - } + if (NumVecs == 1) { + SDNode *Q = PairDRegs(VT, SDValue(VLd, 0), SDValue(VLd, 1)); + ReplaceUses(SDValue(N, 0), SDValue(Q, 0)); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLd, 2*Vec), SDValue(VLd, 2*Vec+1)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue QQ = SDValue(QuadDRegs(MVT::v4i64, + SDValue(VLd, 0), SDValue(VLd, 1), + SDValue(VLd, 2), SDValue(VLd, 3)), 0); + SDValue Q0 = CurDAG->getTargetExtractSubreg(ARM::qsub_0, dl, VT, QQ); + SDValue Q1 = CurDAG->getTargetExtractSubreg(ARM::qsub_1, dl, VT, QQ); + ReplaceUses(SDValue(N, 0), Q0); + ReplaceUses(SDValue(N, 1), Q1); } } else { // Otherwise, quad registers are loaded with two separate instructions, @@ -1169,37 +1172,27 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); - if (llvm::ModelWithRegSequence()) { - SDValue V0 = SDValue(VLdA, 0); - SDValue V1 = SDValue(VLdB, 0); - SDValue V2 = SDValue(VLdA, 1); - SDValue V3 = SDValue(VLdB, 1); - SDValue V4 = SDValue(VLdA, 2); - SDValue V5 = SDValue(VLdB, 2); - SDValue V6 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdA, 3); - SDValue V7 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), - 0) - : SDValue(VLdB, 3); - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, - V4, V5, V6, V7), 0); - - // Extract out the 3 / 4 Q registers. - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, - dl, VT, RegSeq); - ReplaceUses(SDValue(N, Vec), Q); - } - } else { - // Combine the even and odd subregs to produce the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDNode *Q = PairDRegs(VT, SDValue(VLdA, Vec), SDValue(VLdB, Vec)); - ReplaceUses(SDValue(N, Vec), SDValue(Q, 0)); - } + SDValue V0 = SDValue(VLdA, 0); + SDValue V1 = SDValue(VLdB, 0); + SDValue V2 = SDValue(VLdA, 1); + SDValue V3 = SDValue(VLdB, 1); + SDValue V4 = SDValue(VLdA, 2); + SDValue V5 = SDValue(VLdB, 2); + SDValue V6 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdA, 3); + SDValue V7 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,RegVT), 0) + : SDValue(VLdB, 3); + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V0, V1, V2, V3, + V4, V5, V6, V7), 0); + + // Extract out the 3 / 4 Q registers. + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue Q = CurDAG->getTargetExtractSubreg(ARM::qsub_0+Vec, + dl, VT, RegSeq); + ReplaceUses(SDValue(N, Vec), Q); } } ReplaceUses(SDValue(N, NumVecs), Chain); @@ -1209,7 +1202,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { - assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); SDValue MemAddr, Align; @@ -1247,7 +1240,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, Ops.push_back(Align); if (is64BitVector) { - if (llvm::ModelWithRegSequence() && NumVecs >= 2) { + if (NumVecs >= 2) { SDValue RegSeq; SDValue V0 = N->getOperand(0+3); SDValue V1 = N->getOperand(1+3); @@ -1292,7 +1285,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VST1 and VST2, // storing pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - if (llvm::ModelWithRegSequence() && NumVecs == 2) { + if (NumVecs == 2) { // First extract the pair of Q registers. SDValue Q0 = N->getOperand(3); SDValue Q1 = N->getOperand(4); @@ -1330,76 +1323,48 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - if (llvm::ModelWithRegSequence()) { - // Form the QQQQ REG_SEQUENCE. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3)); - V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); - - // Store the even D registers. - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - Ops.push_back(Reg0); // post-access address offset - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, - RegVT, RegSeq)); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - // Store the odd D registers. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, - RegVT, RegSeq); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } else { - Ops.push_back(Reg0); // post-access address offset - - // Store the even subregs. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, - N->getOperand(Vec+3))); - Ops.push_back(Pred); - Ops.push_back(Reg0); // predicate register - Ops.push_back(Chain); - unsigned Opc = QOpcodes0[OpcodeIndex]; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStA, 1); - - // Store the odd subregs. - Ops[0] = SDValue(VStA, 0); // MemAddr - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, - N->getOperand(Vec+3)); - Ops[NumVecs+5] = Chain; - Opc = QOpcodes1[OpcodeIndex]; - SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+6); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; - } + // Form the QQQQ REG_SEQUENCE. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + V[i] = CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, RegVT, + N->getOperand(Vec+3)); + V[i+1] = CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, RegVT, + N->getOperand(Vec+3)); + } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + + SDValue RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); + + // Store the even D registers. + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + Ops.push_back(Reg0); // post-access address offset + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0+Vec*2, dl, + RegVT, RegSeq)); + Ops.push_back(Pred); + Ops.push_back(Reg0); // predicate register + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStA, 1); + + // Store the odd D registers. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::dsub_1+Vec*2, dl, + RegVT, RegSeq); + Ops[NumVecs+5] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+6); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1421,13 +1386,11 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, // Quad registers are handled by load/store of subregs. Find the subreg info. unsigned NumElts = 0; - int SubregIdx = 0; bool Even = false; EVT RegVT = VT; if (!is64BitVector) { RegVT = GetNEONSubregVT(VT); NumElts = RegVT.getVectorNumElements(); - SubregIdx = (Lane < NumElts) ? ARM::dsub_0 : ARM::dsub_1; Even = Lane < NumElts; } @@ -1455,35 +1418,26 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, unsigned Opc = 0; if (is64BitVector) { Opc = DOpcodes[OpcodeIndex]; - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } - - // Now extract the D registers back out. - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, - RegSeq)); - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, - RegSeq)); - if (NumVecs > 2) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, - RegSeq)); - if (NumVecs > 3) - Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(N->getOperand(Vec+3)); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); } + + // Now extract the D registers back out. + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT,RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT,RegSeq)); } else { // Check if this is loading the even or odd subreg of a Q register. if (Lane < NumElts) { @@ -1493,31 +1447,24 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, Opc = QOpcodes1[OpcodeIndex]; } - if (llvm::ModelWithRegSequence()) { - SDValue RegSeq; - SDValue V0 = N->getOperand(0+3); - SDValue V1 = N->getOperand(1+3); - if (NumVecs == 2) { - RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); - } else { - SDValue V2 = N->getOperand(2+3); - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : N->getOperand(3+3); - RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); - } - - // Extract the subregs of the input vector. - unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, - RegSeq)); + SDValue RegSeq; + SDValue V0 = N->getOperand(0+3); + SDValue V1 = N->getOperand(1+3); + if (NumVecs == 2) { + RegSeq = SDValue(PairQRegs(MVT::v4i64, V0, V1), 0); } else { - // Extract the subregs of the input vector. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops.push_back(CurDAG->getTargetExtractSubreg(SubregIdx, dl, RegVT, - N->getOperand(Vec+3))); + SDValue V2 = N->getOperand(2+3); + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : N->getOperand(3+3); + RegSeq = SDValue(QuadQRegs(MVT::v8i64, V0, V1, V2, V3), 0); } + + // Extract the subregs of the input vector. + unsigned SubIdx = Even ? ARM::dsub_0 : ARM::dsub_1; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(SubIdx+Vec*2, dl, RegVT, + RegSeq)); } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); @@ -1531,76 +1478,97 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, ResTys.push_back(MVT::Other); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(),NumVecs+6); - if (llvm::ModelWithRegSequence()) { - // Form a REG_SEQUENCE to force register allocation. - SDValue RegSeq; - if (is64BitVector) { - SDValue V0 = SDValue(VLdLn, 0); - SDValue V1 = SDValue(VLdLn, 1); - if (NumVecs == 2) { - RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); - } else { - SDValue V2 = SDValue(VLdLn, 2); - // If it's a vld3, form a quad D-register but discard the last part. - SDValue V3 = (NumVecs == 3) - ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) - : SDValue(VLdLn, 3); - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); - } + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + if (is64BitVector) { + SDValue V0 = SDValue(VLdLn, 0); + SDValue V1 = SDValue(VLdLn, 1); + if (NumVecs == 2) { + RegSeq = SDValue(PairDRegs(MVT::v2i64, V0, V1), 0); } else { - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - SDValue V[8]; - for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { - if (Even) { - V[i] = SDValue(VLdLn, Vec); - V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - } else { - V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - V[i+1] = SDValue(VLdLn, Vec); - } + SDValue V2 = SDValue(VLdLn, 2); + // If it's a vld3, form a quad D-register but discard the last part. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) + : SDValue(VLdLn, 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + } else { + // For 128-bit vectors, take the 64-bit results of the load and insert + // them as subregs into the result. + SDValue V[8]; + for (unsigned Vec = 0, i = 0; Vec < NumVecs; ++Vec, i+=2) { + if (Even) { + V[i] = SDValue(VLdLn, Vec); + V[i+1] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + } else { + V[i] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); + V[i+1] = SDValue(VLdLn, Vec); } - if (NumVecs == 3) - V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, RegVT), 0); - - if (NumVecs == 2) - RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); - else - RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], - V[4], V[5], V[6], V[7]), 0); } + if (NumVecs == 3) + V[6] = V[7] = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + dl, RegVT), 0); - assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); - assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); - unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - ReplaceUses(SDValue(N, Vec), - CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); - ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); - return NULL; - } - - // For a 64-bit vector load to D registers, nothing more needs to be done. - if (is64BitVector) - return VLdLn; - - // For 128-bit vectors, take the 64-bit results of the load and insert them - // as subregs into the result. - for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { - SDValue QuadVec = CurDAG->getTargetInsertSubreg(SubregIdx, dl, VT, - N->getOperand(Vec+3), - SDValue(VLdLn, Vec)); - ReplaceUses(SDValue(N, Vec), QuadVec); + if (NumVecs == 2) + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V[0], V[1], V[2], V[3]), 0); + else + RegSeq = SDValue(OctoDRegs(MVT::v8i64, V[0], V[1], V[2], V[3], + V[4], V[5], V[6], V[7]), 0); } - Chain = SDValue(VLdLn, NumVecs); - ReplaceUses(SDValue(N, NumVecs), Chain); + assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering"); + assert(ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering"); + unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, RegSeq)); + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, NumVecs)); return NULL; } +SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, + unsigned Opc) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + unsigned FirstTblReg = IsExt ? 2 : 1; + + // Form a REG_SEQUENCE to force register allocation. + SDValue RegSeq; + SDValue V0 = N->getOperand(FirstTblReg + 0); + SDValue V1 = N->getOperand(FirstTblReg + 1); + if (NumVecs == 2) + RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + else { + SDValue V2 = N->getOperand(FirstTblReg + 2); + // If it's a vtbl3, form a quad D-register and leave the last part as + // an undef. + SDValue V3 = (NumVecs == 3) + ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) + : N->getOperand(FirstTblReg + 3); + RegSeq = SDValue(QuadDRegs(MVT::v4i64, V0, V1, V2, V3), 0); + } + + // Now extract the D registers back out. + SmallVector Ops; + if (IsExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_0, dl, VT, RegSeq)); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_1, dl, VT, RegSeq)); + if (NumVecs > 2) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_2, dl, VT, RegSeq)); + if (NumVecs > 3) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::dsub_3, dl, VT, RegSeq)); + + Ops.push_back(N->getOperand(FirstTblReg + NumVecs)); + Ops.push_back(getAL(CurDAG)); // predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register + return CurDAG->getMachineNode(Opc, dl, VT, Ops.data(), Ops.size()); +} + SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) @@ -1954,8 +1922,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, MVT::i32); SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (Subtarget->isThumb()) { - SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0 }; - return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 5); + SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; + return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops, 6); } else { SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG), Reg0, Reg0 }; return CurDAG->SelectNodeTo(N, ARM::RSBrs, MVT::i32, Ops, 7); @@ -2015,7 +1983,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2029,7 +1997,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb()) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; - return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops,4); + return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32,Ops,4); } else { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), @@ -2211,6 +2179,22 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } + case ARMISD::BUILD_VECTOR: { + EVT VecVT = N->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + unsigned NumElts = VecVT.getVectorNumElements(); + if (EltVT.getSimpleVT() == MVT::f64) { + assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); + return PairDRegs(VecVT, N->getOperand(0), N->getOperand(1)); + } + assert(EltVT.getSimpleVT() == MVT::f32 && + "unexpected type for BUILD_VECTOR"); + if (NumElts == 2) + return PairSRegs(VecVT, N->getOperand(0), N->getOperand(1)); + assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); + return QuadSRegs(VecVT, N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3)); + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { @@ -2342,6 +2326,29 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + switch (IntNo) { + default: + break; + + case Intrinsic::arm_neon_vtbl2: + return SelectVTBL(N, false, 2, ARM::VTBL2); + case Intrinsic::arm_neon_vtbl3: + return SelectVTBL(N, false, 3, ARM::VTBL3); + case Intrinsic::arm_neon_vtbl4: + return SelectVTBL(N, false, 4, ARM::VTBL4); + + case Intrinsic::arm_neon_vtbx2: + return SelectVTBL(N, true, 2, ARM::VTBX2); + case Intrinsic::arm_neon_vtbx3: + return SelectVTBL(N, true, 3, ARM::VTBX3); + case Intrinsic::arm_neon_vtbx4: + return SelectVTBL(N, true, 4, ARM::VTBX4); + } + break; + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } @@ -2367,9 +2374,3 @@ FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, CodeGenOpt::Level OptLevel) { return new ARMDAGToDAGISel(TM, OptLevel); } - -/// ModelWithRegSequence - Return true if isel should use REG_SEQUENCE to model -/// operations involving sub-registers. -bool llvm::ModelWithRegSequence() { - return UseRegSeq; -} diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b8126a3..98d8b85 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12,6 +12,7 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "arm-isel" #include "ARM.h" #include "ARMAddressingModes.h" #include "ARMConstantPoolValue.h" @@ -40,6 +41,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" @@ -47,9 +49,27 @@ #include using namespace llvm; +STATISTIC(NumTailCalls, "Number of tail calls"); + +// This option should go away when tail calls fully work. +static cl::opt +EnableARMTailCalls("arm-tail-calls", cl::Hidden, + cl::desc("Generate tail calls (TEMPORARY OPTION)."), + cl::init(true)); + static cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions."), + cl::desc("Generate calls via indirect call instructions"), + cl::init(false)); + +static cl::opt +ARMInterworking("arm-interworking", cl::Hidden, + cl::desc("Enable / disable ARM interworking (for debugging only)"), + cl::init(true)); + +static cl::opt +EnableARMCodePlacement("arm-code-placement", cl::Hidden, + cl::desc("Enable code placement pass for ARM"), cl::init(false)); static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, @@ -94,10 +114,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, } setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); - if (llvm::ModelWithRegSequence()) - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); - else - setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); @@ -393,13 +410,57 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // doesn't yet know how to not do that for SjLj. setExceptionSelectorRegister(ARM::R0); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); - - // If the subtarget does not have extract instructions, sign_extend_inreg - // needs to be expanded. Extract is available in ARM mode on v6 and up, - // and on most Thumb2 implementations. - if ((!Subtarget->isThumb() && !Subtarget->hasV6Ops()) - || (Subtarget->isThumb2() && !Subtarget->hasT2ExtractPack())) { + // Handle atomics directly for ARMv[67] (except for Thumb1), otherwise + // use the default expansion. + bool canHandleAtomics = + (Subtarget->hasV7Ops() || + (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())); + if (canHandleAtomics) { + // membarrier needs custom lowering; the rest are legal and handled + // normally. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); + } else { + // Set them all for expansion, which will force libcalls. + setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + // Since the libcalls include locking, fold in the fences + setShouldFoldAtomicFences(true); + } + // 64-bit versions are always libcalls (for now) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); + setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); + + // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. + if (!Subtarget->hasV6Ops()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); } @@ -412,8 +473,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); - setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + if (Subtarget->isTargetDarwin()) { + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); + } setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); @@ -474,28 +537,14 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) else setSchedulingPreference(Sched::Hybrid); - // FIXME: If-converter should use instruction latency to determine - // profitability rather than relying on fixed limits. - if (Subtarget->getCPUString() == "generic") { - // Generic (and overly aggressive) if-conversion limits. - setIfCvtBlockSizeLimit(10); - setIfCvtDupBlockSizeLimit(2); - } else if (Subtarget->hasV7Ops()) { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(1); - } else if (Subtarget->hasV6Ops()) { - setIfCvtBlockSizeLimit(2); - setIfCvtDupBlockSizeLimit(1); - } else { - setIfCvtBlockSizeLimit(3); - setIfCvtDupBlockSizeLimit(2); - } - maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type - // Do not enable CodePlacementOpt for now: it currently runs after the - // ARMConstantIslandPass and messes up branch relaxation and placement - // of constant islands. - // benefitFromCodePlacementOpt = true; + + // On ARM arguments smaller than 4 bytes are extended, so all arguments + // are at least 4 bytes aligned. + setMinStackArgumentAlignment(4); + + if (EnableARMCodePlacement) + benefitFromCodePlacementOpt = true; } const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { @@ -537,6 +586,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; + case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; + case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; @@ -581,6 +632,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; } @@ -603,15 +655,33 @@ TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { - return getTargetMachine().getSubtarget().isThumb() ? 0 : 1; + return getTargetMachine().getSubtarget().isThumb() ? 1 : 2; } Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + unsigned NumVals = N->getNumValues(); + if (!NumVals) + return Sched::RegPressure; + + for (unsigned i = 0; i != NumVals; ++i) { EVT VT = N->getValueType(i); if (VT.isFloatingPoint() || VT.isVector()) return Sched::Latency; } + + if (!N->isMachineOpcode()) + return Sched::RegPressure; + + // Load are scheduled for latency even if there instruction itinerary + // is not available. + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.mayLoad()) + return Sched::Latency; + + const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); + if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + return Sched::Latency; return Sched::RegPressure; } @@ -964,11 +1034,28 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { - // ARM target does not yet support tail call optimization. - isTailCall = false; + MachineFunction &MF = DAG.getMachineFunction(); + bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); + bool IsSibCall = false; + // Temporarily disable tail calls so things don't break. + if (!EnableARMTailCalls) + isTailCall = false; + if (isTailCall) { + // Check if it's really possible to do a tail call. + isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, + isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), + Outs, OutVals, Ins, DAG); + // We don't support GuaranteedTailCallOpt for ARM, only automatically + // detected sibcalls. + if (isTailCall) { + ++NumTailCalls; + IsSibCall = true; + } + } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -981,9 +1068,14 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); + // For tail calls, memory operands are available in our caller's stack. + if (IsSibCall) + NumBytes = 0; + // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + if (!IsSibCall) + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -996,7 +1088,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, i != e; ++i, ++realArgIdx) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[realArgIdx].Val; + SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; // Promote the value if needed. @@ -1044,7 +1136,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { + } else if (!IsSibCall) { assert(VA.isMemLoc()); MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, @@ -1059,10 +1151,32 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain // and flag operands which copy the outgoing args into the appropriate regs. SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); + // Tail call byval lowering might overwrite argument registers so in case of + // tail call optimization the copies to registers are lowered later. + if (!isTailCall) + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + + // For tail calls lower the arguments to the 'real' stack slot. + if (isTailCall) { + // Force all the incoming stack arguments to be loaded from the stack + // before any new outgoing arguments are stored to the stack, because the + // outgoing stack slots may alias the incoming argument stack slots, and + // the alias isn't otherwise explicit. This is slightly more conservative + // than necessary, because it means that each store effectively depends + // on every argument instead of just those arguments it would clobber. + + // Do not flag preceeding copytoreg stuff together with the following stuff. + InFlag = SDValue(); + for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + RegsToPass[i].second, InFlag); + InFlag = Chain.getValue(1); + } + InFlag =SDValue(); } // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every @@ -1071,7 +1185,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isDirect = false; bool isARMFunc = false; bool isLocalARMFunc = false; - MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); if (EnableARMLongCalls) { @@ -1117,7 +1230,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() != Reloc::Static; isARMFunc = !Subtarget->isThumb() || isStub; // ARM call to a local ARM function is predicable. - isLocalARMFunc = !Subtarget->isThumb() && !isExt; + isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); @@ -1134,7 +1247,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy()); + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { isDirect = true; bool isStub = Subtarget->isTargetDarwin() && @@ -1171,11 +1284,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) : ARMISD::CALL_NOLINK; } - if (CallOpc == ARMISD::CALL_NOLINK && !Subtarget->isThumb1Only()) { - // implicit def LR - LR mustn't be allocated as GRP:$dst of CALL_NOLINK - Chain = DAG.getCopyToReg(Chain, dl, ARM::LR, DAG.getUNDEF(MVT::i32),InFlag); - InFlag = Chain.getValue(1); - } std::vector Ops; Ops.push_back(Chain); @@ -1189,9 +1297,13 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (InFlag.getNode()) Ops.push_back(InFlag); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); + if (isTailCall) + return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); + // Returns a chain and a flag for retval copy to use. - Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), - &Ops[0], Ops.size()); + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), @@ -1205,10 +1317,203 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// MatchingStackOffset - Return true if the given stack call argument is +/// already available in the same position (relatively) of the caller's +/// incoming argument stack. +static +bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, + MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, + const ARMInstrInfo *TII) { + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; + if (Arg.getOpcode() == ISD::CopyFromReg) { + unsigned VR = cast(Arg.getOperand(1))->getReg(); + if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) + return false; + MachineInstr *Def = MRI->getVRegDef(VR); + if (!Def) + return false; + if (!Flags.isByVal()) { + if (!TII->isLoadFromStackSlot(Def, FI)) + return false; + } else { + return false; + } + } else if (LoadSDNode *Ld = dyn_cast(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } + return false; + SDValue Ptr = Ld->getBasePtr(); + FrameIndexSDNode *FINode = dyn_cast(Ptr); + if (!FINode) + return false; + FI = FINode->getIndex(); + } else + return false; + + assert(FI != INT_MAX); + if (!MFI->isFixedObjectIndex(FI)) + return false; + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); +} + +/// IsEligibleForTailCallOptimization - Check whether the call is eligible +/// for tail call optimization. Targets which want to do tail call +/// optimization should implement this function. +bool +ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const { + const Function *CallerF = DAG.getMachineFunction().getFunction(); + CallingConv::ID CallerCC = CallerF->getCallingConv(); + bool CCMatch = CallerCC == CalleeCC; + + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. + + // Do not sibcall optimize vararg calls unless the call site is not passing + // any arguments. + if (isVarArg && !Outs.empty()) + return false; + + // Also avoid sibcall optimization if either caller or callee uses struct + // return semantics. + if (isCalleeStructRet || isCallerStructRet) + return false; + + // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: + // emitEpilogue is not ready for them. + // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take + // LR. This means if we need to reload LR, it takes an extra instructions, + // which outweighs the value of the tail call; but here we don't know yet + // whether LR is going to be used. Probably the right approach is to + // generate the tail call here and turn it back into CALL/RET in + // emitEpilogue if LR is used. + if (Subtarget->isThumb1Only()) + return false; + + // For the moment, we can only do this to functions defined in this + // compilation, or to indirect calls. A Thumb B to an ARM function, + // or vice versa, is not easily fixed up in the linker unlike BL. + // (We could do this by loading the address of the callee into a register; + // that is an extra instruction over the direct call and burns a register + // as well, so is not likely to be a win.) + + // It might be safe to remove this restriction on non-Darwin. + + // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, + // but we need to make sure there are enough registers; the only valid + // registers are the 4 used for parameters. We don't currently do this + // case. + if (isa(Callee)) + return false; + + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + const GlobalValue *GV = G->getGlobal(); + if (GV->isDeclaration() || GV->isWeakForLinker()) + return false; + } + + // If the calling conventions do not match, then we'd better make sure the + // results are returned in the same way as what the caller expects. + if (!CCMatch) { + SmallVector RVLocs1; + CCState CCInfo1(CalleeCC, false, getTargetMachine(), + RVLocs1, *DAG.getContext()); + CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); + + SmallVector RVLocs2; + CCState CCInfo2(CallerCC, false, getTargetMachine(), + RVLocs2, *DAG.getContext()); + CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); + + if (RVLocs1.size() != RVLocs2.size()) + return false; + for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { + if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) + return false; + if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) + return false; + if (RVLocs1[i].isRegLoc()) { + if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) + return false; + } else { + if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) + return false; + } + } + } + + // If the callee takes no arguments then go on to check the results of the + // call. + if (!Outs.empty()) { + // Check if stack adjustment is needed. For now, do not do this if any + // argument is passed on the stack. + SmallVector ArgLocs; + CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), + ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeCallOperands(Outs, + CCAssignFnForNode(CalleeCC, false, isVarArg)); + if (CCInfo.getNextStackOffset()) { + MachineFunction &MF = DAG.getMachineFunction(); + + // Check if the arguments are already laid out in the right way as + // the caller's fixed stack objects. + MachineFrameInfo *MFI = MF.getFrameInfo(); + const MachineRegisterInfo *MRI = &MF.getRegInfo(); + const ARMInstrInfo *TII = + ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); + for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); + i != e; + ++i, ++realArgIdx) { + CCValAssign &VA = ArgLocs[i]; + EVT RegVT = VA.getLocVT(); + SDValue Arg = OutVals[realArgIdx]; + ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + if (VA.getLocInfo() == CCValAssign::Indirect) + return false; + if (VA.needsCustom()) { + // f64 and vector types are split into multiple registers or + // register/stack-slot combinations. The types will not match + // the registers; give up on memory f64 refs until we figure + // out what to do about this. + if (!VA.isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + if (RegVT == MVT::v2f64) { + if (!ArgLocs[++i].isRegLoc()) + return false; + if (!ArgLocs[++i].isRegLoc()) + return false; + } + } else if (!VA.isRegLoc()) { + if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, + MFI, MRI, TII)) + return false; + } + } + } + } + + return true; +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. @@ -1239,7 +1544,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Arg = Outs[realRVLocIdx].Val; + SDValue Arg = OutVals[realRVLocIdx]; switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); @@ -1477,7 +1782,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // pair. This is always cheaper. if (Subtarget->useMovt()) { return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, - DAG.getTargetGlobalAddress(GV, PtrVT)); + DAG.getTargetGlobalAddress(GV, dl, PtrVT)); } else { SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); @@ -1552,9 +1857,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - SDValue Val = Subtarget->isThumb() ? - DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::SP, MVT::i32) : - DAG.getConstant(0, MVT::i32); + SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), Op.getOperand(1), Val); } @@ -1568,8 +1871,7 @@ ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) - const { + const ARMSubtarget *Subtarget) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); DebugLoc dl = Op.getDebugLoc(); switch (IntNo) { @@ -1597,7 +1899,6 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0, false, false, 0); - SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1609,25 +1910,21 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *Subtarget) { + const ARMSubtarget *Subtarget) { DebugLoc dl = Op.getDebugLoc(); SDValue Op5 = Op.getOperand(5); - SDValue Res; unsigned isDeviceBarrier = cast(Op5)->getZExtValue(); - if (isDeviceBarrier) { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } else { - if (Subtarget->hasV7Ops()) - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); - else - Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(0, MVT::i32)); - } - return Res; + // v6 and v7 can both handle barriers directly, but need handled a bit + // differently. Thumb1 and pre-v6 ARM mode use a libcall instead and should + // never get here. + unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; + if (Subtarget->hasV7Ops()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); + else if (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only()) + return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(0, MVT::i32)); + assert(0 && "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + return SDValue(); } static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { @@ -1712,7 +2009,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue ArgValue2; if (NextVA.isMemLoc()) { MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1768,8 +2065,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, VA = ArgLocs[++i]; // skip ahead to next loc SDValue ArgValue2; if (VA.isMemLoc()) { - int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1836,8 +2132,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1868,7 +2163,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, AFI->setVarArgsFrameIndex( MFI->CreateFixedObject(VARegSaveSize, ArgOffset + VARegSaveSize - VARegSize, - true, false)); + true)); SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy()); @@ -1884,8 +2179,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, - PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 0, - false, false, 0); + PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), + 0, false, false, 0); MemOps.push_back(Store); FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, DAG.getConstant(4, getPointerTy())); @@ -1895,8 +2190,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, - true, false)); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); } return Chain; @@ -1978,9 +2272,44 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } +static bool canBitcastToInt(SDNode *Op) { + return Op->hasOneUse() && + ISD::isNormalLoad(Op) && + Op->getValueType(0) == MVT::f32; +} + +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { + if (LoadSDNode *Ld = dyn_cast(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { +SDValue +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { + if (UnsafeFPMath && FiniteOnlyFPMath() && + (CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE) && + canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { + // If unsafe fp math optimization is enabled and there are no othter uses of + // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + LHS = bitcastToInt(LHS, DAG); + RHS = bitcastToInt(RHS, DAG); + return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); + } + SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2010,13 +2339,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMCC, CCR, Cmp); if (CondCode2 != ARMCC::AL) { SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, Result, TrueVal, ARMCC2, CCR, Cmp2); } @@ -2043,8 +2372,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; @@ -2132,7 +2461,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); @@ -2140,8 +2469,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); + SDValue Cmp = getVFPCmp(Tmp1, FP0, + ISD::SETLT, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } @@ -2206,7 +2537,8 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, + DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); } // Turn f64->i64 into VMOVRRD. @@ -2516,76 +2848,149 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { return Result; } -/// isVMOVSplat - Check if the specified splat value corresponds to an immediate -/// VMOV instruction, and if so, return the constant being splatted. -static SDValue isVMOVSplat(uint64_t SplatBits, uint64_t SplatUndef, - unsigned SplatBitSize, SelectionDAG &DAG) { +/// isNEONModifiedImm - Check if the specified splat value corresponds to a +/// valid vector constant for a NEON instruction with a "modified immediate" +/// operand (e.g., VMOV). If so, return either the constant being +/// splatted or the encoded value, depending on the DoEncode parameter. +static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG, + bool isVMOV, bool DoEncode) { + unsigned OpCmode, Imm; + EVT VT; + + // SplatBitSize is set to the smallest size that splats the vector, so a + // zero vector will always have SplatBitSize == 8. However, NEON modified + // immediate instructions others than VMOV do not support the 8-bit encoding + // of a zero vector, and the default encoding of zero is supposed to be the + // 32-bit version. + if (SplatBits == 0) + SplatBitSize = 32; + switch (SplatBitSize) { case 8: - // Any 1-byte value is OK. + // Any 1-byte value is OK. Op=0, Cmode=1110. assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); - return DAG.getTargetConstant(SplatBits, MVT::i8); + OpCmode = 0xe; + Imm = SplatBits; + VT = MVT::i8; + break; case 16: // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i16); - break; + VT = MVT::i16; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x00nn: Op=x, Cmode=100x. + OpCmode = 0x8; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0xnn00: Op=x, Cmode=101x. + OpCmode = 0xa; + Imm = SplatBits >> 8; + break; + } + return SDValue(); case 32: // NEON's 32-bit VMOV supports splat values where: // * only one byte is nonzero, or // * the least significant byte is 0xff and the second byte is nonzero, or // * the least significant 2 bytes are 0xff and the third is nonzero. - if ((SplatBits & ~0xff) == 0 || - (SplatBits & ~0xff00) == 0 || - (SplatBits & ~0xff0000) == 0 || - (SplatBits & ~0xff000000) == 0) - return DAG.getTargetConstant(SplatBits, MVT::i32); + VT = MVT::i32; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x000000nn: Op=x, Cmode=000x. + OpCmode = 0; + Imm = SplatBits; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0x0000nn00: Op=x, Cmode=001x. + OpCmode = 0x2; + Imm = SplatBits >> 8; + break; + } + if ((SplatBits & ~0xff0000) == 0) { + // Value = 0x00nn0000: Op=x, Cmode=010x. + OpCmode = 0x4; + Imm = SplatBits >> 16; + break; + } + if ((SplatBits & ~0xff000000) == 0) { + // Value = 0xnn000000: Op=x, Cmode=011x. + OpCmode = 0x6; + Imm = SplatBits >> 24; + break; + } if ((SplatBits & ~0xffff) == 0 && - ((SplatBits | SplatUndef) & 0xff) == 0xff) - return DAG.getTargetConstant(SplatBits | 0xff, MVT::i32); + ((SplatBits | SplatUndef) & 0xff) == 0xff) { + // Value = 0x0000nnff: Op=x, Cmode=1100. + OpCmode = 0xc; + Imm = SplatBits >> 8; + SplatBits |= 0xff; + break; + } if ((SplatBits & ~0xffffff) == 0 && - ((SplatBits | SplatUndef) & 0xffff) == 0xffff) - return DAG.getTargetConstant(SplatBits | 0xffff, MVT::i32); + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { + // Value = 0x00nnffff: Op=x, Cmode=1101. + OpCmode = 0xd; + Imm = SplatBits >> 16; + SplatBits |= 0xffff; + break; + } // Note: there are a few 32-bit splat values (specifically: 00ffff00, // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not // VMOV.I32. A (very) minor optimization would be to replicate the value // and fall through here to test for a valid 64-bit splat. But, then the // caller would also need to check and handle the change in size. - break; + return SDValue(); case 64: { // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. + if (!isVMOV) + return SDValue(); uint64_t BitMask = 0xff; uint64_t Val = 0; + unsigned ImmMask = 1; + Imm = 0; for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { - if (((SplatBits | SplatUndef) & BitMask) == BitMask) + if (((SplatBits | SplatUndef) & BitMask) == BitMask) { Val |= BitMask; - else if ((SplatBits & BitMask) != 0) + Imm |= ImmMask; + } else if ((SplatBits & BitMask) != 0) { return SDValue(); + } BitMask <<= 8; + ImmMask <<= 1; } - return DAG.getTargetConstant(Val, MVT::i64); + // Op=1, Cmode=1110. + OpCmode = 0x1e; + SplatBits = Val; + VT = MVT::i64; + break; } default: - llvm_unreachable("unexpected size for isVMOVSplat"); - break; + llvm_unreachable("unexpected size for isNEONModifiedImm"); + return SDValue(); } - return SDValue(); + if (DoEncode) { + unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); + return DAG.getTargetConstant(EncodedVal, MVT::i32); + } + return DAG.getTargetConstant(SplatBits, VT); } -/// getVMOVImm - If this is a build_vector of constants which can be -/// formed by using a VMOV instruction of the specified element size, -/// return the constant being splatted. The ByteSize field indicates the -/// number of bytes of each element [1248]. -SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { +/// getNEONModImm - If this is a valid vector constant for a NEON instruction +/// with a "modified immediate" operand (e.g., VMOV) of the specified element +/// size, return the encoded value for that immediate. The ByteSize field +/// indicates the number of bytes of each element [1248]. +SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG) { BuildVectorSDNode *BVN = dyn_cast(N); APInt SplatBits, SplatUndef; unsigned SplatBitSize; @@ -2597,8 +3002,8 @@ SDValue ARM::getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { if (SplatBitSize > ByteSize * 8) return SDValue(); - return isVMOVSplat(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), - SplatBitSize, DAG); + return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, isVMOV, true); } static bool isVEXTMask(const SmallVectorImpl &M, EVT VT, @@ -2838,8 +3243,10 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { bool HasAnyUndefs; if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { - SDValue Val = isVMOVSplat(SplatBits.getZExtValue(), - SplatUndef.getZExtValue(), SplatBitSize, DAG); + // Check if an immediate VMOV works. + SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), + SplatUndef.getZExtValue(), + SplatBitSize, DAG, true, false); if (Val.getNode()) return BuildSplat(Val, VT, DAG, dl); } @@ -2883,21 +3290,17 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ARMISD::VDUP, dl, VT, Value); // Vectors with 32- or 64-bit elements can be built by directly assigning - // the subregisters. + // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands + // will be legalized. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. EVT EltVT = EVT::getFloatingPointVT(EltSize); EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); - SDValue Val = DAG.getUNDEF(VecVT); - for (unsigned i = 0; i < NumElts; ++i) { - SDValue Elt = Op.getOperand(i); - if (Elt.getOpcode() == ISD::UNDEF) - continue; - Elt = DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Elt); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, Elt, - DAG.getConstant(i, MVT::i32)); - } + SmallVector Ops; + for (unsigned i = 0; i < NumElts; ++i) + Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -2934,7 +3337,9 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, bool ReverseVEXT; unsigned Imm, WhichResult; - return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + return (EltSize >= 32 || + ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isVREVMask(M, VT, 64) || isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || @@ -3032,59 +3437,62 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // of the same time so that they get CSEd properly. SVN->getMask(ShuffleMask); - if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { - int Lane = SVN->getSplatIndex(); - // If this is undef splat, generate it via "just" vdup, if possible. - if (Lane == -1) Lane = 0; - - if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { - return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize <= 32) { + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } + return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i32)); } - return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, - DAG.getConstant(Lane, MVT::i32)); - } - bool ReverseVEXT; - unsigned Imm; - if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { - if (ReverseVEXT) - std::swap(V1, V2); - return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, - DAG.getConstant(Imm, MVT::i32)); - } - - if (isVREVMask(ShuffleMask, VT, 64)) - return DAG.getNode(ARMISD::VREV64, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 32)) - return DAG.getNode(ARMISD::VREV32, dl, VT, V1); - if (isVREVMask(ShuffleMask, VT, 16)) - return DAG.getNode(ARMISD::VREV16, dl, VT, V1); - - // Check for Neon shuffles that modify both input vectors in place. - // If both results are used, i.e., if there are two shuffles with the same - // source operands and with masks corresponding to both results of one of - // these operations, DAG memoization will ensure that a single node is - // used for both shuffles. - unsigned WhichResult; - if (isVTRNMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVUZPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); - if (isVZIPMask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V2).getValue(WhichResult); + bool ReverseVEXT; + unsigned Imm; + if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { + if (ReverseVEXT) + std::swap(V1, V2); + return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, + DAG.getConstant(Imm, MVT::i32)); + } - if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); - if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) - return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), - V1, V1).getValue(WhichResult); + if (isVREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(ARMISD::VREV64, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(ARMISD::VREV32, dl, VT, V1); + if (isVREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(ARMISD::VREV16, dl, VT, V1); + + // Check for Neon shuffles that modify both input vectors in place. + // If both results are used, i.e., if there are two shuffles with the same + // source operands and with masks corresponding to both results of one of + // these operations, DAG memoization will ensure that a single node is + // used for both shuffles. + unsigned WhichResult; + if (isVTRNMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVUZPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + if (isVZIPMask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V2).getValue(WhichResult); + + if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) + return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), + V1, V1).getValue(WhichResult); + } // If the shuffle is not directly supported and it has 4 elements, use // the PerfectShuffle-generated table to synthesize it from other shuffles. @@ -3108,8 +3516,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); } - // Implement shuffles with 32- or 64-bit elements as subreg copies. - unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. if (EltSize >= 32) { // Do the expansion with floating-point types, since that is what the VFP // registers are defined to use, and since i64 is not legal. @@ -3117,17 +3524,17 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); - SDValue Val = DAG.getUNDEF(VecVT); + SmallVector Ops; for (unsigned i = 0; i < NumElts; ++i) { if (ShuffleMask[i] < 0) - continue; - SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - ShuffleMask[i] < (int)NumElts ? V1 : V2, - DAG.getConstant(ShuffleMask[i] & (NumElts-1), - MVT::i32)); - Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Val, - Elt, DAG.getConstant(i, MVT::i32)); + Ops.push_back(DAG.getUNDEF(EltVT)); + else + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + ShuffleMask[i] < (int)NumElts ? V1 : V2, + DAG.getConstant(ShuffleMask[i] & (NumElts-1), + MVT::i32))); } + SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); } @@ -3277,7 +3684,12 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MF->insert(It, loop1MBB); MF->insert(It, loop2MBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -3315,7 +3727,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3358,7 +3770,12 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MF->insert(It, loopMBB); MF->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = MF->getRegInfo(); unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); @@ -3403,7 +3820,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // ... BB = exitMBB; - MF->DeleteMachineInstr(MI); // The instruction is gone now. + MI->eraseFromParent(); // The instruction is gone now. return BB; } @@ -3488,22 +3905,21 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) - .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) + .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -3516,11 +3932,12 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(ARM::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(ARM::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -3541,7 +3958,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + BuildMI(*BB, MI, dl, TII->get(CopyOpc), ARM::SP) .addReg(SrcReg, getKillRegState(SrcIsKill)); } @@ -3573,7 +3990,7 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, NeedPred = true; NeedCC = true; NeedOp3 = true; break; } - MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(OpOpc), ARM::SP); if (OpOpc == ARM::tAND) AddDefaultT1CC(MIB); MIB.addReg(ARM::SP); @@ -3589,10 +4006,10 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; - BuildMI(BB, dl, TII->get(CopyOpc)) + BuildMI(*BB, MI, dl, TII->get(CopyOpc)) .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) .addReg(ARM::SP); - MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -3893,7 +4310,8 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { // Narrowing shifts require an immediate right shift. if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) break; - llvm_unreachable("invalid shift count for narrowing vector shift intrinsic"); + llvm_unreachable("invalid shift count for narrowing vector shift " + "intrinsic"); default: llvm_unreachable("unhandled vector shift"); @@ -4156,14 +4574,13 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { if (!Subtarget->hasV6Ops()) // Pre-v6 does not support unaligned mem access. return false; - else { - // v6+ may or may not support unaligned mem access depending on the system - // configuration. - // FIXME: This is pretty conservative. Should we provide cmdline option to - // control the behaviour? - if (!Subtarget->isTargetDarwin()) - return false; - } + + // v6+ may or may not support unaligned mem access depending on the system + // configuration. + // FIXME: This is pretty conservative. Should we provide cmdline option to + // control the behaviour? + if (!Subtarget->isTargetDarwin()) + return false; switch (VT.getSimpleVT().SimpleTy) { default: @@ -4619,7 +5036,7 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } } if (StringRef("{cc}").equals_lower(Constraint)) - return std::make_pair(0U, ARM::CCRRegisterClass); + return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } @@ -4669,7 +5086,6 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, /// vector. If it is invalid, don't add anything to Ops. void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -4818,8 +5234,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } bool diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7517c..3a38669 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -70,6 +70,8 @@ namespace llvm { EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. + TC_RETURN, // Tail call return pseudo. + THREAD_POINTER, DYN_ALLOC, // Dynamic allocation on the stack. @@ -133,6 +135,13 @@ namespace llvm { VUZP, // unzip (deinterleave) VTRN, // transpose + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + // Floating-point max and min: FMAX, FMIN @@ -141,11 +150,12 @@ namespace llvm { /// Define some predicates that are used for node matching. namespace ARM { - /// getVMOVImm - If this is a build_vector of constants which can be - /// formed by using a VMOV instruction of the specified element size, - /// return the constant being splatted. The ByteSize field indicates the - /// number of bytes of each element [1248]. - SDValue getVMOVImm(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); + /// getNEONModImm - If this is a valid vector constant for a NEON + /// instruction with a "modified immediate" operand (e.g., VMOV) of the + /// specified element size, return the encoded value for that immediate. + /// The ByteSize field indicates the number of bytes of each element [1248]. + SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV, + SelectionDAG &DAG); /// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be /// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd) @@ -189,9 +199,9 @@ namespace llvm { bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; /// isLegalICmpImmediate - Return true if the specified immediate is legal - /// icmp immediate, that is the target has icmp instructions which can compare - /// a register against the immediate without having to materialize the - /// immediate into a register. + /// icmp immediate, that is the target has icmp instructions which can + /// compare a register against the immediate without having to materialize + /// the immediate into a register. virtual bool isLegalICmpImmediate(int64_t Imm) const; /// getPreIndexedAddressParts - returns true by value, base pointer and @@ -232,7 +242,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const; @@ -282,7 +291,8 @@ namespace llvm { SDValue &Root, SelectionDAG &DAG, DebugLoc dl) const; - CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; + CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, + bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, DebugLoc dl, SelectionDAG &DAG, const CCValAssign &VA, @@ -303,6 +313,7 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; @@ -327,18 +338,34 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible + /// for tail call optimization. Targets which want to do tail call + /// optimization should implement this function. + bool IsEligibleForTailCallOptimization(SDValue Callee, + CallingConv::ID CalleeCC, + bool isVarArg, + bool isCalleeStructRet, + bool isCallerStructRet, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, + const SmallVectorImpl &Ins, + SelectionDAG& DAG) const; virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index d487df1..ac568e7 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -50,27 +50,23 @@ def VFPLdStMulFrm : Format<22>; def VFPMiscFrm : Format<23>; def ThumbFrm : Format<24>; - -def NEONFrm : Format<25>; -def NEONGetLnFrm : Format<26>; -def NEONSetLnFrm : Format<27>; -def NEONDupFrm : Format<28>; - -def MiscFrm : Format<29>; -def ThumbMiscFrm : Format<30>; - -def NLdStFrm : Format<31>; -def N1RegModImmFrm : Format<32>; -def N2RegFrm : Format<33>; -def NVCVTFrm : Format<34>; -def NVDupLnFrm : Format<35>; -def N2RegVShLFrm : Format<36>; -def N2RegVShRFrm : Format<37>; -def N3RegFrm : Format<38>; -def N3RegVShFrm : Format<39>; -def NVExtFrm : Format<40>; -def NVMulSLFrm : Format<41>; -def NVTBLFrm : Format<42>; +def MiscFrm : Format<25>; + +def NGetLnFrm : Format<26>; +def NSetLnFrm : Format<27>; +def NDupFrm : Format<28>; +def NLdStFrm : Format<29>; +def N1RegModImmFrm: Format<30>; +def N2RegFrm : Format<31>; +def NVCVTFrm : Format<32>; +def NVDupLnFrm : Format<33>; +def N2RegVShLFrm : Format<34>; +def N2RegVShRFrm : Format<35>; +def N3RegFrm : Format<36>; +def N3RegVShFrm : Format<37>; +def NVExtFrm : Format<38>; +def NVMulSLFrm : Format<39>; +def NVTBLFrm : Format<40>; // Misc flags. @@ -1653,17 +1649,17 @@ class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list pattern> - : NVLaneOp; class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list pattern> - : NVLaneOp; class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, list pattern> - : NVLaneOp; // Vector Duplicate Lane (from scalar to all elements) diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 85f6b40..ba228ff 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -63,7 +63,7 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const { void ARMInstrInfo:: reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc dl = Orig->getDebugLoc(); unsigned Opcode = Orig->getOpcode(); switch (Opcode) { diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index d4199d1..4563ffe 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -35,7 +35,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index f3156d9..c73e204 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -53,6 +53,8 @@ def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -117,6 +119,9 @@ def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6, def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; +def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -858,13 +863,13 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #$label", []>; +} // neverHasSideEffects def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), Pseudo, IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []> { let Inst{25} = 1; } -} // neverHasSideEffects //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -1026,6 +1031,74 @@ let isCall = 1, } } +// Tail calls. + +let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { + // Darwin versions. + let Defs = [R0, R1, R2, R3, R9, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdi : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TCRETURNri : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsDarwin]>; + + def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]>; + + def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } + + // Non-Darwin versions (the difference is R9). + let Defs = [R0, R1, R2, R3, R12, + D0, D1, D2, D3, D4, D5, D6, D7, + D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, + D27, D28, D29, D30, D31, PC], + Uses = [SP] in { + def TCRETURNdiND : AInoP<(outs), (ins i32imm:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TCRETURNriND : AInoP<(outs), (ins tcGPR:$dst, variable_ops), + Pseudo, IIC_Br, + "@TC_RETURN","\t$dst", []>, Requires<[IsNotDarwin]>; + + def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b\t$dst @ TAILCALL", + []>, Requires<[IsARM, IsNotDarwin]>; + + def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), + IIC_Br, "b.w\t$dst @ TAILCALL", + []>, Requires<[IsThumb, IsNotDarwin]>; + + def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), + BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", + []>, Requires<[IsNotDarwin]> { + let Inst{7-4} = 0b0001; + let Inst{19-8} = 0b111111111111; + let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; + } + } +} + let isBranch = 1, isTerminator = 1 in { // B is "predicable" since it can be xformed into a Bcc. let isBarrier = 1 in { @@ -1397,6 +1470,14 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, let Inst{25} = 0; } +// A version for the smaller set of tail call registers. +let neverHasSideEffects = 1 in +def MOVr_TC : AsI1<0b1101, (outs tcGPR:$dst), (ins tcGPR:$src), DPFrm, + IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { + let Inst{11-4} = 0b00000000; + let Inst{25} = 0; +} + def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { @@ -2530,31 +2611,30 @@ let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\t${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def Int_eh_sjlj_setjmp_nofp : XI<(outs), (ins GPR:$src, GPR:$val), AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, NoItinerary, - "str\tsp, [$src, #+8] ${:comment} eh_setjmp begin\n\t" - "add\t$val, pc, #8\n\t" - "str\t$val, [$src, #+4]\n\t" - "mov\tr0, #0\n\t" - "add\tpc, pc, #0\n\t" - "mov\tr0, #1 ${:comment} eh_setjmp end", "", + "add\t$val, pc, #8\n ${:comment} eh_setjmp begin\n\t" + "str\t$val, [$src, #+4]\n\t" + "mov\tr0, #0\n\t" + "add\tpc, pc, #0\n\t" + "mov\tr0, #1 ${:comment} eh_setjmp end", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, GPR:$val))]>, Requires<[IsARM, NoVFP]>; } @@ -2621,6 +2701,24 @@ def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), // TODO: add,sub,and, 3-instr forms? +// Tail calls +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNri tcGPR:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdi texternalsym:$dst)>, Requires<[IsDarwin]>; + +def : ARMPat<(ARMtcret tcGPR:$dst), + (TCRETURNriND tcGPR:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 tglobaladdr:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; + +def : ARMPat<(ARMtcret (i32 texternalsym:$dst)), + (TCRETURNdiND texternalsym:$dst)>, Requires<[IsNotDarwin]>; // Direct calls def : ARMPat<(ARMcall texternalsym:$func), (BL texternalsym:$func)>, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 197ec16..a84315f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -98,17 +98,8 @@ def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; // NEON operand definitions //===----------------------------------------------------------------------===// -def h8imm : Operand { - let PrintMethod = "printHex8ImmOperand"; -} -def h16imm : Operand { - let PrintMethod = "printHex16ImmOperand"; -} -def h32imm : Operand { - let PrintMethod = "printHex32ImmOperand"; -} -def h64imm : Operand { - let PrintMethod = "printHex64ImmOperand"; +def nModImm : Operand { + let PrintMethod = "printNEONModImmOperand"; } //===----------------------------------------------------------------------===// @@ -812,11 +803,6 @@ def DSubReg_f64_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + N->getZExtValue(), MVT::i32); }]>; -def DSubReg_f64_other_reg : SDNodeXFormgetTargetConstant(ARM::dsub_0 + (1 - N->getZExtValue()), - MVT::i32); -}]>; // Extract S sub-registers of Q/D registers. def SSubReg_f32_reg : SDNodeXForm; // For disassembly only. defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$dst, $src, #0">; + "$dst, $src, #0">; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, @@ -2834,73 +2820,70 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src), // VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm. def VMOV_get_imm8 : SDNodeXForm; def vmovImm8 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 1, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0; }], VMOV_get_imm8>; // VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm. def VMOV_get_imm16 : SDNodeXForm; def vmovImm16 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 2, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0; }], VMOV_get_imm16>; // VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm. def VMOV_get_imm32 : SDNodeXForm; def vmovImm32 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 4, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0; }], VMOV_get_imm32>; // VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm. def VMOV_get_imm64 : SDNodeXForm; def vmovImm64 : PatLeaf<(build_vector), [{ - return ARM::getVMOVImm(N, 8, *CurDAG).getNode() != 0; + return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0; }], VMOV_get_imm64>; -// Note: Some of the cmode bits in the following VMOV instructions need to -// be encoded based on the immed values. - let isReMaterializable = 1 in { def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins h8imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h16imm:$SIMM), IIC_VMOVImm, +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), - (ins h32imm:$SIMM), IIC_VMOVImm, +def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst), + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins h64imm:$SIMM), IIC_VMOVImm, + (ins nModImm:$SIMM), IIC_VMOVImm, "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; } // isReMaterializable @@ -3122,17 +3105,6 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; -def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (i64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; -def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), - (INSERT_SUBREG QPR:$src, - (f64 (EXTRACT_SUBREG QPR:$src, - (DSubReg_f64_reg imm:$lane))), - (DSubReg_f64_other_reg imm:$lane))>; - // VMOVN : Vector Narrowing Move defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn", "i", int_arm_neon_vmovn>; @@ -3319,22 +3291,16 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 - DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "", []>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 - DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "", []>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTB4, - "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, - DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "", []>; } // hasExtraSrcRegAllocReq = 1 // VTBX : Vector Table Extension @@ -3348,23 +3314,18 @@ let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 - DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst", []>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTBX3, - "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; + "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", + "$orig = $dst", []>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4, "vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", - "$orig = $dst", - [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, - DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; + "$orig = $dst", []>; } // hasExtraSrcRegAllocReq = 1 //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 40f924b..bc0790d 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -894,11 +894,11 @@ def tLEApcrel : T1I<(outs tGPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, "adr$p\t$dst, #$label", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 +} // neverHasSideEffects def tLEApcrelJT : T1I<(outs tGPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p\t$dst, #${label}_${id}", []>, T1Encoding<{1,0,1,0,0,?}>; // A6.2 & A8.6.10 -} // neverHasSideEffects //===----------------------------------------------------------------------===// // TLS Instructions @@ -923,18 +923,18 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ], hasSideEffects = 1, + isBarrier = 1 in { def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>; } @@ -1037,7 +1037,8 @@ def : T1Pat<(i32 imm0_255_comp:$src), // scheduling. let isReMaterializable = 1 in def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index b91c089..4692f2a 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -637,8 +637,7 @@ multiclass T2I_st opcod, string opc, PatFrag opnode> { multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, ".w\t$dst, $src", - [(set GPR:$dst, (opnode GPR:$src))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode GPR:$src))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -649,8 +648,7 @@ multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { } def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, opc, ".w\t$dst, $src, ror $rot", - [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]>, - Requires<[HasT2ExtractPack]> { + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { let Inst{31-27} = 0b11111; let Inst{26-23} = 0b0100; let Inst{22-20} = opcod; @@ -661,8 +659,8 @@ multiclass T2I_unary_rrot opcod, string opc, PatFrag opnode> { } } -// SXTB16 and UXTB16 do not need the .w qualifier. -multiclass T2I_unary_rrot_nw opcod, string opc, PatFrag opnode> { +// UXTB16 - Requres T2ExtractPack, does not need the .w qualifier. +multiclass T2I_unary_rrot_uxtb16 opcod, string opc, PatFrag opnode> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", [(set GPR:$dst, (opnode GPR:$src))]>, @@ -689,9 +687,9 @@ multiclass T2I_unary_rrot_nw opcod, string opc, PatFrag opnode> { } } -// DO variant - disassembly only, no pattern - -multiclass T2I_unary_rrot_DO opcod, string opc> { +// SXTB16 - Requres T2ExtractPack, does not need the .w qualifier, no pattern +// supported yet. +multiclass T2I_unary_rrot_sxtb16 opcod, string opc> { def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, opc, "\t$dst, $src", []> { let Inst{31-27} = 0b11111; @@ -787,6 +785,7 @@ def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p), IIC_iALUi, let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } +} // neverHasSideEffects def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, nohash_imm:$id, pred:$p), IIC_iALUi, "adr$p.w\t$dst, #${label}_${id}", []> { @@ -798,7 +797,6 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; } -} // neverHasSideEffects // ADD r, sp, {so_imm|i12} def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), @@ -1330,7 +1328,7 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb", UnOpFrag<(sext_inreg node:$Src, i8)>>; defm t2SXTH : T2I_unary_rrot<0b000, "sxth", UnOpFrag<(sext_inreg node:$Src, i16)>>; -defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">; +defm t2SXTB16 : T2I_unary_rrot_sxtb16<0b010, "sxtb16">; defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>; @@ -1347,13 +1345,13 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_uxtb16<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 24)>; + (t2UXTB16r_rot GPR:$Src, 24)>, Requires<[HasT2ExtractPack]>; def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), - (t2UXTB16r_rot GPR:$Src, 8)>; + (t2UXTB16r_rot GPR:$Src, 8)>, Requires<[HasT2ExtractPack]>; defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab", BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; @@ -2389,37 +2387,36 @@ let isCall = 1, // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. -// The current SP is passed in $val, and we reuse the reg as a scratch. +// $val is a scratch register for our use. let Defs = [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, - D31 ] in { + D31 ], hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, HasVFP2]>; } let Defs = - [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ] in { + [ R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR ], + hasSideEffects = 1, isBarrier = 1 in { def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins GPR:$src, tGPR:$val), AddrModeNone, SizeSpecial, NoItinerary, - "str\t$val, [$src, #8]\t${:comment} begin eh.setjmp\n" - "\tmov\t$val, pc\n" - "\tadds\t$val, #7\n" - "\tstr\t$val, [$src, #4]\n" - "\tmovs\tr0, #0\n" - "\tb\t1f\n" - "\tmovs\tr0, #1\t${:comment} end eh.setjmp\n" + "mov\t$val, pc\t${:comment} begin eh.setjmp\n\t" + "adds\t$val, #7\n\t" + "str\t$val, [$src, #4]\n\t" + "movs\tr0, #0\n\t" + "b\t1f\n\t" + "movs\tr0, #1\t${:comment} end eh.setjmp\n\t" "1:", "", [(set R0, (ARMeh_sjlj_setjmp GPR:$src, tGPR:$val))]>, Requires<[IsThumb2, NoVFP]>; @@ -2529,6 +2526,7 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br, // IT block +let Defs = [ITSTATE] in def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask), AddrModeNone, Size2Bytes, IIC_iALUx, "it$mask\t$cc", "", []> { @@ -2691,7 +2689,8 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), // scheduling. let canFoldAsLoad = 1, isReMaterializable = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), - NoItinerary, "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + NoItinerary, + "${:comment} ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), imm:$cp))]>, Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 54474cf..84c23e1 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -255,25 +255,25 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, // Between half-precision and single-precision. For disassembly only. -def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBSH: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f32_to_f16 SPR:$a), (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; -def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTBHS: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; def : ARMPat<(f16_to_f32 GPR:$a), (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; -def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTSH: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$dst, $a", [/* For disassembly only; pattern left blank */]>; -def VCVTTHS : ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), +def VCVTTHS: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$dst, $a", [/* For disassembly only; pattern left blank */]>; diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index ff332b7..f5d9eff 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -143,7 +143,8 @@ namespace llvm { JumpTableId2AddrMap[JTI] = Addr; } - /// getPCLabelAddr - Retrieve the address of the PC label of the specified id. + /// getPCLabelAddr - Retrieve the address of the PC label of the + /// specified id. intptr_t getPCLabelAddr(unsigned Id) const { DenseMap::const_iterator I = PCLabelMap.find(Id); assert(I != PCLabelMap.end()); diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8585c1e..f80e316 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -74,11 +74,14 @@ namespace { private: struct MemOpQueueEntry { int Offset; + unsigned Reg; + bool isKill; unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, int p, MachineBasicBlock::iterator i) - : Offset(o), Position(p), MBBI(i), Merged(false) {} + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MachineBasicBlock::iterator i) + : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; typedef SmallVector MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; @@ -128,30 +131,30 @@ namespace { static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: - NumLDMGened++; + ++NumLDMGened; return ARM::LDM; case ARM::STR: - NumSTMGened++; + ++NumSTMGened; return ARM::STM; case ARM::t2LDRi8: case ARM::t2LDRi12: - NumLDMGened++; + ++NumLDMGened; return ARM::t2LDM; case ARM::t2STRi8: case ARM::t2STRi12: - NumSTMGened++; + ++NumSTMGened; return ARM::t2STM; case ARM::VLDRS: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMS; case ARM::VSTRS: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMS; case ARM::VLDRD: - NumVLDMGened++; + ++NumVLDMGened; return ARM::VLDMD; case ARM::VSTRD: - NumVSTMGened++; + ++NumVSTMGened; return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } @@ -264,45 +267,59 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on // success. -void ARMLoadStoreOpt:: -MergeOpsUpdate(MachineBasicBlock &MBB, - MemOpQueue &memOps, - unsigned memOpsBegin, - unsigned memOpsEnd, - unsigned insertAfter, - int Offset, - unsigned Base, - bool BaseKill, - int Opcode, - ARMCC::CondCodes Pred, - unsigned PredReg, - unsigned Scratch, - DebugLoc dl, - SmallVector &Merges) { +void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB, + MemOpQueue &memOps, + unsigned memOpsBegin, unsigned memOpsEnd, + unsigned insertAfter, int Offset, + unsigned Base, bool BaseKill, + int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, + DebugLoc dl, + SmallVector &Merges) { // First calculate which of the registers should be killed by the merged // instruction. - SmallVector, 8> Regs; const unsigned insertPos = memOps[insertAfter].Position; + + SmallSet UnavailRegs; + SmallSet KilledRegs; + DenseMap Killer; + for (unsigned i = 0; i < memOpsBegin; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + if (memOps[i].Merged) + UnavailRegs.insert(Reg); + else { + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + } + for (unsigned i = memOpsEnd, e = memOps.size(); i != e; ++i) { + if (memOps[i].Position < insertPos && memOps[i].isKill) { + unsigned Reg = memOps[i].Reg; + KilledRegs.insert(Reg); + Killer[Reg] = i; + } + } + + SmallVector, 8> Regs; for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { - const MachineOperand &MO = memOps[i].MBBI->getOperand(0); - unsigned Reg = MO.getReg(); - bool isKill = MO.isKill(); + unsigned Reg = memOps[i].Reg; + if (UnavailRegs.count(Reg)) + // Register is killed before and it's not easy / possible to update the + // kill marker on already merged instructions. Abort. + return; // If we are inserting the merged operation after an unmerged operation that // uses the same register, make sure to transfer any kill flag. - for (unsigned j = memOpsEnd, e = memOps.size(); !isKill && j != e; ++j) - if (memOps[j].PositiongetOperand(0); - if (MOJ.getReg() == Reg && MOJ.isKill()) - isKill = true; - } - + bool isKill = memOps[i].isKill || KilledRegs.count(Reg); Regs.push_back(std::make_pair(Reg, isKill)); } // Try to do the merge. MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI; - Loc++; + ++Loc; if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Regs)) return; @@ -311,13 +328,13 @@ MergeOpsUpdate(MachineBasicBlock &MBB, Merges.push_back(prior(Loc)); for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) { // Remove kill flags from any unmerged memops that come before insertPos. - if (Regs[i-memOpsBegin].second) - for (unsigned j = memOpsEnd, e = memOps.size(); j != e; ++j) - if (memOps[j].PositiongetOperand(0); - if (MOJ.getReg() == Regs[i-memOpsBegin].first && MOJ.isKill()) - MOJ.setIsKill(false); - } + if (Regs[i-memOpsBegin].second) { + unsigned Reg = Regs[i-memOpsBegin].first; + if (KilledRegs.count(Reg)) { + unsigned j = Killer[Reg]; + memOps[j].MBBI->getOperand(0).setIsKill(false); + } + } MBB.erase(memOps[i].MBBI); memOps[i].Merged = true; } @@ -517,8 +534,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isAM4) { if (Mode == ARM_AM::ia && isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -541,8 +561,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (isAM4) { if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) && isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) { @@ -669,8 +692,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100); // Try merging with the previous instruction. - if (MBBI != MBB.begin()) { + MachineBasicBlock::iterator BeginMBBI = MBB.begin(); + if (MBBI != BeginMBBI) { MachineBasicBlock::iterator PrevMBBI = prior(MBBI); + while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue()) + --PrevMBBI; if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; AddSub = ARM_AM::sub; @@ -685,8 +711,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } // Try merging with the next instruction. - if (!DoMerge && MBBI != MBB.end()) { + MachineBasicBlock::iterator EndMBBI = MBB.end(); + if (!DoMerge && MBBI != EndMBBI) { MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI); + while (NextMBBI != EndMBBI && NextMBBI->isDebugValue()) + ++NextMBBI; if (!isAM5 && isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) { DoMerge = true; @@ -759,18 +788,21 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, /// isMemoryOp - Returns true if instruction is a memory operations (that this /// pass is capable of operating on). static bool isMemoryOp(const MachineInstr *MI) { - if (MI->hasOneMemOperand()) { - const MachineMemOperand *MMO = *MI->memoperands_begin(); + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI->hasOneMemOperand()) + return false; - // Don't touch volatile memory accesses - we may be changing their order. - if (MMO->isVolatile()) - return false; + const MachineMemOperand *MMO = *MI->memoperands_begin(); - // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is - // not. - if (MMO->getAlignment() < 4) - return false; - } + // Don't touch volatile memory accesses - we may be changing their order. + if (MMO->isVolatile()) + return false; + + // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is + // not. + if (MMO->getAlignment() < 4) + return false; // str could probably be eliminated entirely, but for now we just want // to avoid making a mess of it. @@ -898,6 +930,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) return false; + MachineBasicBlock::iterator NewBBI = MBBI; bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8; bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8; bool EvenDeadKill = isLd ? @@ -942,6 +975,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, getKillRegState(OddDeadKill) | getUndefRegState(OddUndef)); ++NumSTRD2STM; } + NewBBI = llvm::prior(MBBI); } else { // Split into two instructions. assert((!isT2 || !OffReg) && @@ -962,14 +996,15 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, OddReg, OddDeadKill, false, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, false, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { if (OddReg == EvenReg && EvenDeadKill) { - // If the two source operands are the same, the kill marker is probably - // on the first one. e.g. + // If the two source operands are the same, the kill marker is + // probably on the first one. e.g. // t2STRDi8 %R5, %R5, %R9, 0, 14, %reg0 EvenDeadKill = false; OddDeadKill = true; @@ -978,6 +1013,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, Pred, PredReg, TII, isT2); + NewBBI = llvm::prior(MBBI); InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill, OddUndef, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, @@ -989,8 +1025,9 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, ++NumSTRD2STR; } - MBBI = prior(MBBI); MBB.erase(MI); + MBBI = NewBBI; + return true; } return false; } @@ -1023,6 +1060,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { if (isMemOp) { int Opcode = MBBI->getOpcode(); unsigned Size = getLSMultipleTransferSize(MBBI); + const MachineOperand &MO = MBBI->getOperand(0); + unsigned Reg = MO.getReg(); + bool isKill = MO.isDef() ? false : MO.isKill(); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg); @@ -1044,8 +1084,8 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { CurrSize = Size; CurrPred = Pred; CurrPredReg = PredReg; - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI)); + ++NumMemOps; Advance = true; } else { if (Clobber) { @@ -1057,15 +1097,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // No need to match PredReg. // Continue adding to the queue. if (Offset > MemOps.back().Offset) { - MemOps.push_back(MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; } else { for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) { if (Offset < I->Offset) { - MemOps.insert(I, MemOpQueueEntry(Offset, Position, MBBI)); - NumMemOps++; + MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill, + Position, MBBI)); + ++NumMemOps; Advance = true; break; } else if (Offset == I->Offset) { @@ -1078,7 +1120,12 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } } - if (Advance) { + if (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == E) + // Reach the end of the block, try merging the memory instructions. + TryMerge = true; + } else if (Advance) { ++Position; ++MBBI; if (MBBI == E) @@ -1279,7 +1326,7 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, // some day. SmallSet AddedRegPressure; while (++I != E) { - if (MemOps.count(&*I)) + if (I->isDebugValue() || MemOps.count(&*I)) continue; const TargetInstrDesc &TID = I->getDesc(); if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) @@ -1411,7 +1458,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, std::sort(Ops.begin(), Ops.end(), OffsetCompare()); // The loads / stores of the same base are in order. Scan them from first to - // last and check for the followins: + // last and check for the following: // 1. Any def of base. // 2. Any gaps. while (Ops.size() > 1) { @@ -1474,7 +1521,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, } else { // This is the new location for the loads / stores. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; - while (InsertPos != MBB->end() && MemOps.count(InsertPos)) + while (InsertPos != MBB->end() + && (MemOps.count(InsertPos) || InsertPos->isDebugValue())) ++InsertPos; // If we are moving a pair of loads / stores, see if it makes sense @@ -1562,7 +1610,9 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { break; } - MI2LocMap[MI] = Loc++; + if (!MI->isDebugValue()) + MI2LocMap[MI] = ++Loc; + if (!isMemoryOp(MI)) continue; unsigned PredReg = 0; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 0134276..7e57a1c 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -88,6 +88,9 @@ class ARMFunctionInfo : public MachineFunctionInfo { /// VarArgsFrameIndex - FrameIndex for start of varargs area. int VarArgsFrameIndex; + /// HasITBlocks - True if IT blocks have been inserted. + bool HasITBlocks; + public: ARMFunctionInfo() : isThumb(false), @@ -97,7 +100,8 @@ public: FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget().isThumb()), @@ -108,7 +112,8 @@ public: GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), GPRCS1Frames(32), GPRCS2Frames(32), DPRCSFrames(32), SpilledCSRegs(MF.getTarget().getRegisterInfo()->getNumRegs()), - JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0) {} + JumpTableUId(0), ConstPoolEntryUId(0), VarArgsFrameIndex(0), + HasITBlocks(false) {} bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -229,6 +234,9 @@ public: int getVarArgsFrameIndex() const { return VarArgsFrameIndex; } void setVarArgsFrameIndex(int Index) { VarArgsFrameIndex = Index; } + + bool hasITBlocks() const { return HasITBlocks; } + void setHasITBlocks(bool h) { HasITBlocks = h; } }; } // End llvm namespace diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 6beca8b..d020f3c 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -153,11 +153,11 @@ def Q15 : ARMReg<15, "q15", [D30, D31]>; // Pseudo 256-bit registers to represent pairs of Q registers. These should // never be present in the emitted code. -// These are used for NEON load / store instructions, e.g. vld4, vst3. -// NOTE: It's possible to define more QQ registers since technical the -// starting D register number doesn't have to be multiple of 4. e.g. -// D1, D2, D3, D4 would be a legal quad. But that would make the sub-register -// stuffs very messy. +// These are used for NEON load / store instructions, e.g., vld4, vst3. +// NOTE: It's possible to define more QQ registers since technically the +// starting D register number doesn't have to be multiple of 4, e.g., +// D1, D2, D3, D4 would be a legal quad, but that would make the subregister +// stuff very messy. let SubRegIndices = [qsub_0, qsub_1] in { let CompositeIndices = [(dsub_2 qsub_1, dsub_0), (dsub_3 qsub_1, dsub_1), (ssub_4 qsub_1, ssub_0), (ssub_5 qsub_1, ssub_1), @@ -183,7 +183,8 @@ let CompositeIndices = [(qsub_2 qqsub_1, qsub_0), (qsub_3 qqsub_1, qsub_1), (ssub_8 qqsub_1, ssub_0), (ssub_9 qqsub_1, ssub_1), (ssub_10 qqsub_1, ssub_2), (ssub_11 qqsub_1, ssub_3), (ssub_12 qqsub_1, ssub_4), (ssub_13 qqsub_1, ssub_5), - (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in { + (ssub_14 qqsub_1, ssub_6), (ssub_15 qqsub_1, ssub_7)] in +{ def QQQQ0 : ARMReg<0, "qqqq0", [QQ0, QQ1]>; def QQQQ1 : ARMReg<1, "qqqq1", [QQ2, QQ3]>; } @@ -196,9 +197,9 @@ def QQQQ3 : ARMReg<3, "qqqq3", [QQ6, QQ7]>; } // Current Program Status Register. -def CPSR : ARMReg<0, "cpsr">; - -def FPSCR : ARMReg<1, "fpscr">; +def CPSR : ARMReg<0, "cpsr">; +def FPSCR : ARMReg<1, "fpscr">; +def ITSTATE : ARMReg<2, "itstate">; // Register classes. // @@ -348,6 +349,73 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { }]; } +// For tail calls, we can't use callee-saved registers, as they are restored +// to the saved value before the tail call, which would clobber a call address. +// Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of +// this class and the preceding one(!) This is what we want. +def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> { + let MethodProtos = [{ + iterator allocation_order_begin(const MachineFunction &MF) const; + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + // R9 is available. + static const unsigned ARM_GPR_R9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R9, ARM::R12 }; + // R9 is not available. + static const unsigned ARM_GPR_NOR9_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3, + ARM::R12 }; + + // For Thumb1 mode, we don't want to allocate hi regs at all, as we + // don't know how to spill them. If we make our prologue/epilogue code + // smarter at some point, we can go back to using the above allocation + // orders for the Thumb1 instructions that know how to use hi regs. + static const unsigned THUMB_GPR_AO_TC[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; + + tcGPRClass::iterator + tcGPRClass::allocation_order_begin(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget(); + if (Subtarget.isThumb1Only()) + return THUMB_GPR_AO_TC; + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + return ARM_GPR_NOR9_TC; + else + return ARM_GPR_R9_TC; + } else + // R9 is either callee-saved or reserved; can't use it. + return ARM_GPR_NOR9_TC; + } + + tcGPRClass::iterator + tcGPRClass::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const ARMSubtarget &Subtarget = TM.getSubtarget(); + GPRClass::iterator I; + + if (Subtarget.isThumb1Only()) { + I = THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned)); + return I; + } + + if (Subtarget.isTargetDarwin()) { + if (Subtarget.isR9Reserved()) + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + else + I = ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)); + } else + // R9 is either callee-saved or reserved; can't use it. + I = ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned)); + return I; + } + }]; +} + + // Scalar single precision floating point register class.. def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22, @@ -479,4 +547,3 @@ def QQQQPR : RegisterClass<"ARM", [v8i64], // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; - diff --git a/lib/Target/ARM/ARMScheduleA8.td b/lib/Target/ARM/ARMScheduleA8.td index bbfc0b2..282abca 100644 --- a/lib/Target/ARM/ARMScheduleA8.td +++ b/lib/Target/ARM/ARMScheduleA8.td @@ -1,10 +1,10 @@ //=- ARMScheduleA8.td - ARM Cortex-A8 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A8 processors. @@ -32,50 +32,50 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData]>, // // Binary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 2, 2]>, - InstrItinData], [2, 2, 1]>, - InstrItinData], [2, 2, 1, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1, 1]>, // // Unary Instructions that produce a result - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // // Compare instructions - InstrItinData], [2]>, - InstrItinData], [2, 2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // // Move instructions, unconditional - InstrItinData], [1]>, - InstrItinData], [1, 1]>, - InstrItinData], [1, 1]>, - InstrItinData], [1, 1, 1]>, + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1, 1]>, // // Move instructions, conditional - InstrItinData], [2]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1]>, - InstrItinData], [2, 1, 1]>, + InstrItinData], [2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, // Integer multiply pipeline // Result written in E5, but that is relative to the last cycle of multicycle, // so we use 6 for those cases // InstrItinData], [5, 1, 1]>, - InstrItinData, + InstrItinData, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData, + InstrItinData, InstrStage<2, [A8_Pipe0]>], [6, 1, 1]>, - InstrItinData, + InstrItinData, InstrStage<2, [A8_Pipe0]>], [6, 1, 1, 4]>, - InstrItinData, + InstrItinData, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - InstrItinData, + InstrItinData, InstrStage<3, [A8_Pipe0]>], [6, 6, 1, 1]>, - + // Integer load pipeline // // loads have an extra cycle of latency, but are fully pipelined @@ -166,7 +166,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0]>]>, - + // Branch // // no delay slots, so the latency of a branch is unimportant @@ -276,14 +276,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Load // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -292,7 +292,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Load Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -301,14 +301,14 @@ def CortexA8Itineraries : ProcessorItineraries< // // Single-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // Double-precision FP Store // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0], 0>, InstrStage<1, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -317,7 +317,7 @@ def CortexA8Itineraries : ProcessorItineraries< // // FP Store Multiple // use A8_Issue to enforce the 1 load/store per cycle limit - InstrItinData, + InstrItinData, InstrStage<2, [A8_Pipe0], 0>, InstrStage<2, [A8_Pipe1]>, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, @@ -329,35 +329,35 @@ def CortexA8Itineraries : ProcessorItineraries< // // VLD1 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, // // VLD2 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 1]>, // // VLD3 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 1]>, // // VLD4 // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>], [2, 2, 2, 2, 1]>, // // VST // FIXME: We don't model this instruction properly - InstrItinData, + InstrItinData, InstrStage<1, [A8_Pipe0, A8_Pipe1]>, InstrStage<1, [A8_LdSt0], 0>, InstrStage<1, [A8_NLSPipe]>]>, @@ -600,7 +600,7 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 2, 2, 3, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData, @@ -610,9 +610,9 @@ def CortexA8Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 1]>, + InstrStage<2, [A8_NLSPipe]>],[4, 1, 2, 2, 3, 1]>, InstrItinData, InstrStage<1, [A8_NLSPipe]>, InstrStage<1, [A8_NPipe], 0>, - InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<2, [A8_NLSPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 75320d9..df2f896 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -1,10 +1,10 @@ //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines the itinerary class data for the ARM Cortex A9 processors. @@ -16,7 +16,6 @@ // Reference Manual". // // Functional units -def A9_Issue : FuncUnit; // issue def A9_Pipe0 : FuncUnit; // pipeline 0 def A9_Pipe1 : FuncUnit; // pipeline 1 def A9_LSPipe : FuncUnit; // LS pipe @@ -27,7 +26,121 @@ def A9_DRegsN : FuncUnit; // FP register set, NEON side // Dual issue pipeline represented by A9_Pipe0 | A9_Pipe1 // def CortexA9Itineraries : ProcessorItineraries< - [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1, A9_Issue], [ + [A9_NPipe, A9_DRegsN, A9_DRegsVFP, A9_LSPipe, A9_Pipe0, A9_Pipe1], [ + // Two fully-pipelined integer ALU pipelines + // FIXME: There are no operand latencies for these instructions at all! + // + // Move instructions, unconditional + InstrItinData], [1]>, + InstrItinData], [1, 1]>, + InstrItinData], [1, 1]>, + InstrItinData], [2, 2, 1]>, + // + // No operand cycles + InstrItinData]>, + // + // Binary Instructions that produce a result + InstrItinData], [2, 2]>, + InstrItinData], [2, 2, 2]>, + InstrItinData], [2, 2, 1]>, + InstrItinData], [2, 2, 1, 1]>, + // + // Unary Instructions that produce a result + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // + // Compare instructions + InstrItinData], [2]>, + InstrItinData], [2, 2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + // + // Move instructions, conditional + InstrItinData], [2]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1]>, + InstrItinData], [2, 1, 1]>, + + // Integer multiply pipeline + // + InstrItinData, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1]>, + InstrItinData, + InstrStage<2, [A9_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + InstrItinData, + InstrStage<3, [A9_Pipe0]>], [4, 5, 1, 1]>, + // Integer load pipeline + // FIXME: The timings are some rough approximations + // + // Immediate offset + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [4, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [3, 2, 1, 1]>, + // + // Scaled register offset with update + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [4, 3, 1, 1]>, + // + // Load multiple + InstrItinData, + InstrStage<1, [A9_LSPipe]>]>, + + // Integer store pipeline + /// + // Immediate offset + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [3, 1]>, + // + // Register offset + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [3, 1, 1]>, + // + // Scaled register offset + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [3, 1, 1]>, + // + // Immediate offset with update + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1]>, + // + // Register offset with update + InstrItinData, + InstrStage<1, [A9_LSPipe]>], [2, 3, 1, 1]>, + // + // Scaled register offset with update + InstrItinData, + InstrStage<2, [A9_LSPipe]>], [3, 3, 1, 1]>, + // + // Store multiple + InstrItinData, + InstrStage<1, [A9_LSPipe]>]>, + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData]>, + // VFP and NEON shares the same register file. This means that every VFP // instruction should wait for full completion of the consecutive NEON // instruction and vice-versa. We model this behavior with two artificial FUs: @@ -39,8 +152,8 @@ def CortexA9Itineraries : ProcessorItineraries< // register file writeback!). // Every NEON instruction does the same but with FUs swapped. // - // Since the reserved FU cannot be acquired this models precisly "cross-domain" - // stalls. + // Since the reserved FU cannot be acquired, this models precisely + // "cross-domain" stalls. // VFP // Issue through integer pipeline, and execute in NEON unit. @@ -48,21 +161,21 @@ def CortexA9Itineraries : ProcessorItineraries< // FP Special Register to Integer Register File Move InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Unary InstrItinData, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Unary InstrItinData, // Extra latency cycles since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // @@ -70,124 +183,124 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision FP Compare InstrItinData, // Extra latency cycles since wbck is 4 cycles InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Single to Double FP Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double to Single FP Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single to Half FP Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Half to Single FP Convert InstrItinData, InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Single-Precision FP to Integer Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-Precision FP to Integer Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Single-Precision FP Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Integer to Double-Precision FP Convert InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Single-precision FP ALU InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-precision FP ALU InstrItinData, InstrStage<5, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Single-precision FP Multiply InstrItinData, InstrStage<6, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 1, 1]>, // // Double-precision FP Multiply InstrItinData, InstrStage<7, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 1, 1]>, // // Single-precision FP MAC InstrItinData, InstrStage<9, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [8, 0, 1, 1]>, // // Double-precision FP MAC InstrItinData, InstrStage<10, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [9, 0, 1, 1]>, // // Single-precision FP DIV InstrItinData, InstrStage<16, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<10, [A9_NPipe]>], [15, 1, 1]>, // // Double-precision FP DIV InstrItinData, InstrStage<26, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<20, [A9_NPipe]>], [25, 1, 1]>, // // Single-precision FP SQRT InstrItinData, InstrStage<18, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<13, [A9_NPipe]>], [17, 1]>, + InstrStage<1, [A9_Pipe1]>, + InstrStage<13, [A9_NPipe]>], [17, 1]>, // // Double-precision FP SQRT InstrItinData, InstrStage<33, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<28, [A9_NPipe]>], [32, 1]>, // @@ -195,92 +308,79 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Integer to Double-precision Move InstrItinData, // Extra 1 latency cycle since wbck is 2 cycles InstrStage<3, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision to Integer Move InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1]>, // // Double-precision to Integer Move InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [1, 1, 1]>, // // Single-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Load - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Load Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Single-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-precision FP Store - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // FP Store Multiple - // use A9_Issue to enforce the 1 load/store per cycle limit InstrItinData, InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // NEON // Issue through integer pipeline, and execute in NEON unit. - // FIXME: Neon pipeline and LdSt unit are multiplexed. + // FIXME: Neon pipeline and LdSt unit are multiplexed. // Add some syntactic sugar to model this! // VLD1 // FIXME: We don't model this instruction properly InstrItinData, InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // VLD2 @@ -288,9 +388,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // VLD3 @@ -298,9 +397,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 1]>, // // VLD4 @@ -308,9 +406,8 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>], [2, 2, 2, 2, 1]>, // // VST @@ -318,121 +415,120 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Issue], 0>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<1, [A9_LSPipe], 0>, + InstrStage<1, [A9_Pipe1], 0>, + InstrStage<1, [A9_LSPipe]>, InstrStage<1, [A9_NPipe]>]>, // // Double-register Integer Unary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Quad-register Integer Unary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2]>, // // Double-register Integer Q-Unary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Quad-register Integer CountQ-Unary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1]>, // // Double-register Integer Binary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Binary InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Double-register Integer Subtract InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Quad-register Integer Subtract InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 1]>, // // Double-register Integer Shift InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Quad-register Integer Shift InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 1, 1]>, // // Double-register Integer Shift (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Quad-register Integer Shift (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 1, 1]>, // // Double-register Integer Binary (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Quad-register Integer Binary (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Integer Subtract (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // // Quad-register Integer Subtract (4 cycle) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [4, 2, 1]>, // @@ -440,7 +536,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3, 2, 2]>, // // Quad-register Integer Count @@ -449,35 +545,35 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [4, 2, 2]>, // // Double-register Absolute Difference and Accumulate InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register Absolute Difference and Accumulate InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Double-register Integer Pair Add Long InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 1]>, // // Quad-register Integer Pair Add Long InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 1]>, // @@ -485,14 +581,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Integer Multiply (.8, .16) InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 2]>, // @@ -500,56 +596,56 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 2, 1]>, // // Quad-register Integer Multiply (.32) InstrItinData, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 2, 1]>, // // Double-register Integer Multiply-Accumulate (.8, .16) InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [6, 3, 2, 2]>, // // Double-register Integer Multiply-Accumulate (.32) InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 1]>, // // Quad-register Integer Multiply-Accumulate (.8, .16) InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [7, 3, 2, 2]>, // // Quad-register Integer Multiply-Accumulate (.32) InstrItinData, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [9, 3, 2, 1]>, // // Move Immediate InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [3]>, // // Double-register Permute Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_LSPipe]>], [2, 1]>, // // Quad-register Permute Move @@ -558,42 +654,42 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1]>, // // Integer to Single-precision Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Integer to Double-precision Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Single-precision to Integer Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1]>, // // Double-precision to Integer Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1]>, // // Integer to Lane Move InstrItinData, // FIXME: all latencies are arbitrary, no information is available InstrStage<4, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // @@ -601,7 +697,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2]>, // // Quad-register FP Unary @@ -610,7 +706,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2]>, // // Double-register FP Binary @@ -619,7 +715,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [5, 2, 2]>, // // Quad-register FP Binary @@ -630,14 +726,14 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Double-register FP Multiple-Accumulate InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 3, 2, 1]>, // // Quad-register FP Multiple-Accumulate @@ -646,28 +742,28 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 4, 2, 1]>, // // Double-register Reciprical Step InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [6, 2, 2]>, // // Quad-register Reciprical Step InstrItinData, // Extra latency cycles since wbck is 9 cycles InstrStage<10, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<4, [A9_NPipe]>], [8, 2, 2]>, // // Double-register Permute InstrItinData, // Extra latency cycles since wbck is 6 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 2, 1, 1]>, // // Quad-register Permute @@ -676,7 +772,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 3, 1, 1]>, // // Quad-register Permute (3 cycle issue) @@ -685,7 +781,7 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_LSPipe]>], [4, 4, 1, 1]>, // @@ -693,57 +789,57 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<7, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<1, [A9_NPipe]>], [2, 1, 1]>, // // Quad-register VEXT InstrItinData, // Extra latency cycles since wbck is 9 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 1]>, // // VTB InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 1]>, InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 2, 2, 1]>, InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 1]>, InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 2, 2, 3, 3, 1]>, // // VTBX InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 1]>, InstrItinData, // Extra latency cycles since wbck is 7 cycles InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<2, [A9_NPipe]>], [3, 1, 2, 2, 1]>, InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, + InstrStage<1, [A9_Pipe1]>, InstrStage<3, [A9_NPipe]>], [4, 1, 2, 2, 3, 1]>, InstrItinData, // Extra latency cycles since wbck is 8 cycles InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<1, [A9_Pipe0, A9_Pipe1]>, - InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> + InstrStage<1, [A9_Pipe1]>, + InstrStage<2, [A9_NPipe]>], [4, 1, 2, 2, 3, 3, 1]> ]>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index f813022..08b560c 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -16,7 +16,7 @@ // Functional Units def V6_Pipe : FuncUnit; // pipeline -// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". +// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual" // def ARMV6Itineraries : ProcessorItineraries< [V6_Pipe], [ diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b4a9252..09203f9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -60,8 +60,10 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : - std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), + std::string("e-p:32:32-f64:32:32-i64:32:32-" + "v128:32:128-v64:32:64-n32") : + std::string("e-p:32:32-f64:64:64-i64:64:64-" + "v128:64:128-v64:64:64-n32")), TLInfo(*this), TSInfo(*this) { } @@ -74,9 +76,11 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : + "i16:16:32-i8:8:32-i1:8:32-" + "v128:32:128-v64:32:64-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), + "i16:16:32-i8:8:32-i1:8:32-" + "v128:64:128-v64:64:64-a:0:32-n32")), TLInfo(*this), TSInfo(*this) { } @@ -98,6 +102,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); + return true; } @@ -115,21 +120,20 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, // proper scheduling. PM.add(createARMExpandPseudoPass()); - return true; -} - -bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None) { if (!Subtarget.isThumb1Only()) PM.add(createIfConverterPass()); } - - if (Subtarget.isThumb2()) { + if (Subtarget.isThumb2()) PM.add(createThumb2ITBlockPass()); + + return true; +} + +bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (Subtarget.isThumb2()) PM.add(createThumb2SizeReductionPass()); - } PM.add(createARMConstantIslandPass()); return true; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index bfa89c4..8415d1a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -425,7 +425,7 @@ bool ARMAsmParser::ParseMemory(OwningPtr &Op) { const AsmToken &NextTok = Parser.getTok(); if (NextTok.isNot(AsmToken::EndOfStatement)) { if (NextTok.isNot(AsmToken::Comma)) - return Error(NextTok.getLoc(), "',' expected"); + return Error(NextTok.getLoc(), "',' expected"); Parser.Lex(); // Eat comma token. if(ParseMemoryOffsetReg(Negative, OffsetRegShifted, ShiftType, ShiftAmount, Offset, OffsetIsReg, OffsetRegNum, @@ -488,7 +488,7 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, const AsmToken &Tok = Parser.getTok(); if (ParseShift(ShiftType, ShiftAmount, E)) - return Error(Tok.getLoc(), "shift expected"); + return Error(Tok.getLoc(), "shift expected"); OffsetRegShifted = true; } } @@ -665,7 +665,6 @@ bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.push_back(Op.take()); - SMLoc Loc = Parser.getTok().getLoc(); if (getLexer().isNot(AsmToken::EndOfStatement)) { // Read the first operand. @@ -763,15 +762,10 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) { if (Tok.isNot(AsmToken::Identifier)) return Error(L, "unexpected token in .syntax directive"); const StringRef &Mode = Tok.getString(); - bool unified_syntax; - if (Mode == "unified" || Mode == "UNIFIED") { + if (Mode == "unified" || Mode == "UNIFIED") Parser.Lex(); - unified_syntax = true; - } - else if (Mode == "divided" || Mode == "DIVIDED") { + else if (Mode == "divided" || Mode == "DIVIDED") Parser.Lex(); - unified_syntax = false; - } else return Error(L, "unrecognized syntax mode in .syntax directive"); @@ -791,15 +785,10 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) { if (Tok.isNot(AsmToken::Integer)) return Error(L, "unexpected token in .code directive"); int64_t Val = Parser.getTok().getIntVal(); - bool thumb_mode; - if (Val == 16) { + if (Val == 16) Parser.Lex(); - thumb_mode = true; - } - else if (Val == 32) { + else if (Val == 32) Parser.Lex(); - thumb_mode = false; - } else return Error(L, "invalid operand to .code directive"); diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index d95efdb..6a40cf3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -175,23 +175,8 @@ namespace { raw_ostream &O); void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - - void printHex8ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); - } - void printHex16ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); - } - void printHex32ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); - } - void printHex64ImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); - } + void printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O); virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, @@ -322,7 +307,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned DRegLo = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_0); unsigned DRegHi = TM.getRegisterInfo()->getSubReg(Reg, ARM::dsub_1); O << '{' - << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) + << getRegisterName(DRegLo) << ", " << getRegisterName(DRegHi) << '}'; } else if (Modifier && strcmp(Modifier, "lane") == 0) { unsigned RegNum = ARMRegisterInfo::getRegisterNumbering(Reg); @@ -617,8 +602,12 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO2.getImm()) { + unsigned Align = MO2.getImm(); + assert((Align == 8 || Align == 16 || Align == 32) && + "unexpected NEON load/store alignment"); + Align <<= 3; // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO2.getImm(); + O << ", :" << Align; } O << "]"; } @@ -1039,6 +1028,14 @@ void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum, } } +void ARMAsmPrinter::printNEONModImmOperand(const MachineInstr *MI, int OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); +} + bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { @@ -1064,20 +1061,10 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, printOperand(MI, OpNum, O); return false; case 'Q': - if (TM.getTargetData()->isLittleEndian()) - break; - // Fallthrough case 'R': - if (TM.getTargetData()->isBigEndian()) - break; - // Fallthrough - case 'H': // Write second word of DI / DF reference. - // Verify that this operand has two consecutive registers. - if (!MI->getOperand(OpNum).isReg() || - OpNum+1 == MI->getNumOperands() || - !MI->getOperand(OpNum+1).isReg()) - return true; - ++OpNum; // Return the high-part. + case 'H': + report_fatal_error("llvm does not support 'Q', 'R', and 'H' modifiers!"); + return true; } } @@ -1384,11 +1371,11 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { } else if (MO.isGlobal()) { MCSymbol *Symbol = MCInstLowering.GetGlobalAddressSymbol(MO); const MCSymbolRefExpr *SymRef1 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_LO16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_LO16, OutContext); const MCSymbolRefExpr *SymRef2 = - MCSymbolRefExpr::Create(Symbol, - MCSymbolRefExpr::VK_ARM_HI16, OutContext); + MCSymbolRefExpr::Create(Symbol, + MCSymbolRefExpr::VK_ARM_HI16, OutContext); V1 = MCOperand::CreateExpr(SymRef1); V2 = MCOperand::CreateExpr(SymRef2); } else { diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 2b94b76..170819a 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -779,22 +779,10 @@ void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, O << '#' << MI->getOperand(OpNum).getImm(); } -void ARMInstPrinter::printHex8ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); -} - -void ARMInstPrinter::printHex16ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); -} - -void ARMInstPrinter::printHex32ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); -} - -void ARMInstPrinter::printHex64ImmOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); +void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + unsigned EncodedImm = MI->getOperand(OpNum).getImm(); + unsigned EltBits; + uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); + O << "#0x" << utohexstr(Val); } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index be0b7c1..ddf5047 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -104,10 +104,7 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex8ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex16ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex32ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printHex64ImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNEONModImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPCLabel(const MCInst *MI, unsigned OpNum, raw_ostream &O); // FIXME: Implement. diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index 29e66e1..0df3466 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(ARMCodeGen NEONPreAllocPass.cpp Thumb1InstrInfo.cpp Thumb1RegisterInfo.cpp + Thumb2HazardRecognizer.cpp Thumb2ITBlockPass.cpp Thumb2InstrInfo.cpp Thumb2RegisterInfo.cpp diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index adb7795..a07ff28 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -34,7 +34,7 @@ /// Uses and Defs by this instr. For the Uses part, the pred:$p operand is /// defined with two components: /// -/// def pred { // Operand PredicateOperand +/// def pred { // Operand PredicateOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printPredicateOperand"; /// string AsmOperandLowerMethod = ?; @@ -54,7 +54,7 @@ /// /// For the Defs part, in the simple case of only cc_out:$s, we have: /// -/// def cc_out { // Operand OptionalDefOperand +/// def cc_out { // Operand OptionalDefOperand /// ValueType Type = OtherVT; /// string PrintMethod = "printSBitModifierOperand"; /// string AsmOperandLowerMethod = ?; @@ -765,7 +765,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass == 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -1106,7 +1106,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && (OpInfo[OpIdx+1].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+2].RegClass == 0) && + (OpInfo[OpIdx+2].RegClass < 0) && "Expect 3 reg operands"); // Register-controlled shifts have Inst{7} = 0 and Inst{4} = 1. @@ -1201,7 +1201,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1323,7 +1323,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass == 0) && + (OpInfo[OpIdx+1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; @@ -1494,7 +1494,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional imm5 LSL/ASR operand. - if (ThreeReg && OpInfo[OpIdx].RegClass == 0 + if (ThreeReg && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 5-bit immediate field Inst{11-7}. unsigned ShiftAmt = (insn >> ARMII::ShiftShift) & 0x1F; @@ -1540,7 +1540,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // If there is still an operand info left which is an immediate operand, add // an additional rotate immediate operand. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Extract the 2-bit rotate field Inst{11-10}. unsigned rot = (insn >> ARMII::ExtRotImmShift) & 3; @@ -1725,7 +1725,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, "Tied to operand expected"); MI.addOperand(MI.getOperand(0)); - assert(OpInfo[2].RegClass == 0 && !OpInfo[2].isPredicate() && + assert(OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef() && "Imm operand expected"); MI.addOperand(MCOperand::CreateImm(fbits)); @@ -1984,7 +1984,7 @@ static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; // Extract/decode the f64/f32 immediate. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // The asm syntax specifies the before-expanded . // Not VFPExpandImm(slice(insn,19,16) << 4 | slice(insn, 3, 0), @@ -2077,42 +2077,12 @@ static unsigned decodeLaneIndex(uint32_t insn) { // imm3 = Inst{18-16}, imm4 = Inst{3-0} // Ref: Table A7-15 Modified immediate values for Advanced SIMD instructions. static uint64_t decodeN1VImm(uint32_t insn, ElemSize esize) { + unsigned char op = (insn >> 5) & 1; unsigned char cmode = (insn >> 8) & 0xF; unsigned char Imm8 = ((insn >> 24) & 1) << 7 | ((insn >> 16) & 7) << 4 | (insn & 0xF); - uint64_t Imm64 = 0; - - switch (esize) { - case ESize8: - Imm64 = Imm8; - break; - case ESize16: - Imm64 = Imm8 << 8*(cmode >> 1 & 1); - break; - case ESize32: { - if (cmode == 12) - Imm64 = (Imm8 << 8) | 0xFF; - else if (cmode == 13) - Imm64 = (Imm8 << 16) | 0xFFFF; - else { - // Imm8 to be shifted left by how many bytes... - Imm64 = Imm8 << 8*(cmode >> 1 & 3); - } - break; - } - case ESize64: { - for (unsigned i = 0; i < 8; ++i) - if ((Imm8 >> i) & 1) - Imm64 |= (uint64_t)0xFF << 8*i; - break; - } - default: - assert(0 && "Unreachable code!"); - return 0; - } - - return Imm64; + return (op << 12) | (cmode << 8) | Imm8; } // A8.6.339 VMUL, VMULL (by scalar) @@ -2303,7 +2273,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2320,7 +2290,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, "Reg operand expected"); RegClass = OpInfo[OpIdx].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2329,7 +2299,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2340,7 +2310,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, // possible TIED_TO DPR/QPR's (ignored), then possible lane index. RegClass = OpInfo[0].RegClass; - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, RegClass, Rd, UseDRegPair(Opcode)))); @@ -2355,7 +2325,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && - OpInfo[OpIdx + 1].RegClass == 0 && "Addrmode #6 Operands expected"); + OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? @@ -2366,7 +2336,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - while (OpIdx < NumOps && OpInfo[OpIdx].RegClass == RegClass) { + while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) { assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 && "Tied to operand expected"); MI.addOperand(MCOperand::CreateReg(0)); @@ -2374,7 +2344,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, } // Handle possible lane index. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(decodeLaneIndex(insn))); ++OpIdx; @@ -2438,7 +2408,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && (OpInfo[0].RegClass == ARM::DPRRegClassID || OpInfo[0].RegClass == ARM::QPRRegClassID) && - (OpInfo[1].RegClass == 0) && + (OpInfo[1].RegClass < 0) && "Expect 1 reg operand followed by 1 imm operand"); // Qd/Dd = Inst{22:15-12} => NEON Rd @@ -2552,7 +2522,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn, } // Add the imm operand, if required. - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { unsigned imm = 0xFFFFFFFF; @@ -2632,7 +2602,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, decodeNEONRm(insn)))); ++OpIdx; - assert(OpInfo[OpIdx].RegClass == 0 && "Imm operand expected"); + assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. @@ -2762,7 +2732,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, getRegisterEnum(B, OpInfo[OpIdx].RegClass, m))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the imm operand. unsigned Imm = 0; @@ -2869,15 +2839,9 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -static bool DisassembleNEONFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - assert(0 && "Unreachable code!"); - return false; -} - // Vector Get Lane (move scalar to ARM core register) Instructions. // VGETLNi32, VGETLNs16, VGETLNs8, VGETLNu16, VGETLNu8: Rt Dn index -static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2887,7 +2851,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(TID.getNumDefs() == 1 && NumOps >= 3 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::DPRRegClassID && - OpInfo[2].RegClass == 0 && + OpInfo[2].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2911,7 +2875,7 @@ static bool DisassembleNEONGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Set Lane (move ARM core register to scalar) Instructions. // VSETLNi16, VSETLNi32, VSETLNi8: Dd Dd (TIED_TO) Rt index -static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; @@ -2923,7 +2887,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, OpInfo[1].RegClass == ARM::DPRRegClassID && TID.getOperandConstraint(1, TOI::TIED_TO) != -1 && OpInfo[2].RegClass == ARM::GPRRegClassID && - OpInfo[3].RegClass == 0 && + OpInfo[3].RegClass < 0 && "Expect >= 3 operands with one dst operand"); ElemSize esize = @@ -2950,7 +2914,7 @@ static bool DisassembleNEONSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Vector Duplicate Instructions (from ARM core register to all elements). // VDUP8d, VDUP16d, VDUP32d, VDUP8q, VDUP16q, VDUP32q: Qd/Dd Rt -static bool DisassembleNEONDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, +static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; @@ -3090,13 +3054,6 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -static bool DisassembleThumbMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, - unsigned short NumOps, unsigned &NumOpsAdded, BO) { - - assert(0 && "Unexpected thumb misc. instruction!"); - return false; -} - /// FuncPtrs - FuncPtrs maps ARMFormat to its corresponding DisassembleFP. /// We divide the disassembly task into different categories, with each one /// corresponding to a specific instruction encoding format. There could be @@ -3128,12 +3085,10 @@ static const DisassembleFP FuncPtrs[] = { &DisassembleVFPLdStMulFrm, &DisassembleVFPMiscFrm, &DisassembleThumbFrm, - &DisassembleNEONFrm, - &DisassembleNEONGetLnFrm, - &DisassembleNEONSetLnFrm, - &DisassembleNEONDupFrm, &DisassembleMiscFrm, - &DisassembleThumbMiscFrm, + &DisassembleNGetLnFrm, + &DisassembleNSetLnFrm, + &DisassembleNDupFrm, // VLD and VST (including one lane) Instructions. &DisassembleNLdSt, @@ -3233,7 +3188,8 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). @@ -3265,7 +3221,8 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // a pair of TargetOperandInfos with isPredicate() property. if (NumOpsRemaining >= 2 && OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() && - OpInfo[Idx].RegClass == 0 && OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) + OpInfo[Idx].RegClass < 0 && + OpInfo[Idx+1].RegClass == ARM::CCRRegClassID) { // If we are inside an IT block, get the IT condition bits maintained via // ARMBasicMCBuilder::ITState[7:0], through ARMBasicMCBuilder::GetITCond(). diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index b1d90df..7d21256 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -137,25 +137,25 @@ static inline void setSlice(uint32_t &Bits, unsigned From, unsigned To, /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. -static inline bool isUnaryDP(unsigned TSFlags) { +static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. -static inline unsigned getAddrMode(unsigned TSFlags) { +static inline unsigned getAddrMode(uint64_t TSFlags) { return (TSFlags & ARMII::AddrModeMask); } /// {IndexModePre, IndexModePost} /// Only valid for load and store ops. /// See also ARMBaseInstrInfo.h. -static inline unsigned getIndexMode(unsigned TSFlags) { +static inline unsigned getIndexMode(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; } /// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList. -static inline bool isPrePostLdSt(unsigned TSFlags) { +static inline bool isPrePostLdSt(uint64_t TSFlags) { return (TSFlags & ARMII::IndexModeMask) != 0; } diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b2e308..4b7a0bf 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -395,7 +395,7 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, getT1tRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) @@ -531,7 +531,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -598,7 +598,7 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, assert(OpIdx < NumOps && "More operands expected"); - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() && + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); @@ -632,7 +632,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -658,7 +658,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn, if (!OpInfo) return false; assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass == 0 && + (OpInfo[1].RegClass < 0 && !OpInfo[1].isPredicate() && !OpInfo[1].isOptionalDef()) && "Invalid arguments"); @@ -685,7 +685,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps >= 3 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && - (OpInfo[2].RegClass == 0 && + (OpInfo[2].RegClass < 0 && !OpInfo[2].isPredicate() && !OpInfo[2].isOptionalDef()) && "Invalid arguments"); @@ -761,7 +761,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, // Predicate operands are handled elsewhere. if (NumOps == 2 && OpInfo[0].isPredicate() && OpInfo[1].isPredicate() && - OpInfo[0].RegClass == 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { + OpInfo[0].RegClass < 0 && OpInfo[1].RegClass == ARM::CCRRegClassID) { return true; } @@ -808,7 +808,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn, } assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && - (OpInfo[1].RegClass==0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) + (OpInfo[1].RegClass < 0 || OpInfo[1].RegClass==ARM::tGPRRegClassID) && "Expect >=2 operands"); // Add the destination operand. @@ -913,7 +913,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 3 && OpInfo[0].RegClass == 0 && + assert(NumOps == 3 && OpInfo[0].RegClass < 0 && OpInfo[1].isPredicate() && OpInfo[2].RegClass == ARM::CCRRegClassID && "Exactly 3 operands expected"); @@ -939,7 +939,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; if (!OpInfo) return false; - assert(NumOps == 1 && OpInfo[0].RegClass == 0 && "1 imm operand expected"); + assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected"); unsigned Imm11 = getT1Imm11(insn); @@ -1239,7 +1239,7 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID && OpInfo[2].RegClass == ARM::GPRRegClassID - && OpInfo[3].RegClass == 0 + && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); // Add the operands. @@ -1322,8 +1322,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, assert(NumOps == 4 && OpInfo[0].RegClass == ARM::GPRRegClassID && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == 0 - && OpInfo[3].RegClass == 0 + && OpInfo[2].RegClass < 0 + && OpInfo[3].RegClass < 0 && "Exactlt 4 operands expect and first two as reg operands"); // Only need to populate the src reg operand. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -1375,7 +1375,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == OpIdx) return true; - if (OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { if (Thumb2ShiftOpcode(Opcode)) @@ -1440,7 +1440,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, } // The modified immediate operand should come next. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1555,7 +1555,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1772,7 +1772,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; @@ -1792,7 +1792,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, } ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 && + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Fills in the shift amount for t2PLDs, t2PLDWs, t2PLIs. MI.addOperand(MCOperand::CreateImm(slice(insn, 5, 4))); @@ -1818,7 +1818,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == 0 && + OpInfo[1].RegClass < 0 && "Expect >= 2 operands, first as reg, and second as imm operand"); // Build the register operand, followed by the (+/-)imm12 immediate. @@ -1930,7 +1930,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, ++OpIdx; } - assert(OpInfo[OpIdx].RegClass == 0 && !OpInfo[OpIdx].isPredicate() + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1981,7 +1981,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRm(insn)))); ++OpIdx; - if (OpIdx < NumOps && OpInfo[OpIdx].RegClass == 0 + if (OpIdx < NumOps && OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { // Add the rotation amount immediate. MI.addOperand(MCOperand::CreateImm(decodeRotate(insn))); diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 0a4400c..bbdd3c7 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -105,8 +105,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { unsigned MOReg = MO.getReg(); Defs[MOReg] = MI; - // Catch subregs as well. - for (const unsigned *R = TRI->getSubRegisters(MOReg); *R; ++R) + // Catch aliases as well. + for (const unsigned *R = TRI->getAliasSet(MOReg); *R; ++R) Defs[*R] = MI; } } diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index a725898..f67717c 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -407,7 +407,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, "expected a virtual register"); // Extracting from a Q or QQ register. MachineInstr *DefMI = MRI->getVRegDef(VirtReg); - if (!DefMI || !DefMI->isExtractSubreg()) + if (!DefMI || !DefMI->isCopy() || !DefMI->getOperand(1).getSubReg()) return false; VirtReg = DefMI->getOperand(1).getReg(); if (LastSrcReg && LastSrcReg != VirtReg) @@ -418,7 +418,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, RC != ARM::QQPRRegisterClass && RC != ARM::QQQQPRRegisterClass) return false; - unsigned SubIdx = DefMI->getOperand(2).getImm(); + unsigned SubIdx = DefMI->getOperand(1).getSubReg(); if (LastSubIdx) { if (LastSubIdx != SubIdx-Stride) return false; @@ -434,22 +434,21 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI, // FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is // currently required for correctness. e.g. - // %reg1041; = REG_SEQUENCE %reg1040, 5, %reg1035, 6 + // %reg1041 = REG_SEQUENCE %reg1040, 5, %reg1035, 6 // %reg1042 = EXTRACT_SUBREG %reg1041, 6 // %reg1043 = EXTRACT_SUBREG %reg1041, 5 // VST1q16 %reg1025, 0, %reg1043, %reg1042, - // reg1025 and reg1043 should be replaced with reg1041:6 and reg1041:5 + // reg1042 and reg1043 should be replaced with reg1041:6 and reg1041:5 // respectively. // We need to change how we model uses of REG_SEQUENCE. for (unsigned R = 0; R < NumRegs; ++R) { MachineOperand &MO = MI->getOperand(FirstOpnd + R); unsigned OldReg = MO.getReg(); MachineInstr *DefMI = MRI->getVRegDef(OldReg); - assert(DefMI->isExtractSubreg()); + assert(DefMI->isCopy()); MO.setReg(LastSrcReg); MO.setSubReg(SubIds[R]); - if (R != 0) - MO.setIsKill(false); + MO.setIsKill(false); // Delete the EXTRACT_SUBREG if its result is now dead. if (MRI->use_empty(OldReg)) DefMI->eraseFromParent(); @@ -467,43 +466,9 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) { unsigned FirstOpnd, NumRegs, Offset, Stride; if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride)) continue; - if (llvm::ModelWithRegSequence() && - FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) + if (FormsRegSequence(MI, FirstOpnd, NumRegs, Offset, Stride)) continue; - - MachineBasicBlock::iterator NextI = llvm::next(MBBI); - for (unsigned R = 0; R < NumRegs; ++R) { - MachineOperand &MO = MI->getOperand(FirstOpnd + R); - assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand"); - unsigned VirtReg = MO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "expected a virtual register"); - - // For now, just assign a fixed set of adjacent registers. - // This leaves plenty of room for future improvements. - static const unsigned NEONDRegs[] = { - ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 - }; - MO.setReg(NEONDRegs[Offset + R * Stride]); - - if (MO.isUse()) { - // Insert a copy from VirtReg. - TII->copyRegToReg(MBB, MBBI, MO.getReg(), VirtReg, - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - if (MO.isKill()) { - MachineInstr *CopyMI = prior(MBBI); - CopyMI->findRegisterUseOperand(VirtReg)->setIsKill(); - } - MO.setIsKill(); - } else if (MO.isDef() && !MO.isDead()) { - // Add a copy to VirtReg. - TII->copyRegToReg(MBB, NextI, VirtReg, MO.getReg(), - ARM::DPRRegisterClass, ARM::DPRRegisterClass, - DebugLoc()); - } - } + llvm_unreachable("expected a REG_SEQUENCE"); } return Modified; diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index fae84d4..af630ac 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -33,64 +33,24 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; - } - } - - return false; -} - -bool Thumb1InstrInfo:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - return false; - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - return false; - } - return true; - } - } - - return false; +void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + assert(ARM::GPRRegClass.contains(DestReg, SrcReg) && + "Thumb1 can only copy GPR registers"); } void Thumb1InstrInfo:: @@ -175,10 +135,10 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, isKill = false; } - if (isKill) { + if (isKill) MBB.addLiveIn(Reg); - MIB.addReg(Reg, RegState::Kill); - } + + MIB.addReg(Reg, getKillRegState(isKill)); } return true; } @@ -221,46 +181,3 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, return true; } - -MachineInstr *Thumb1InstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, - const SmallVectorImpl &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = NULL; - switch (Opc) { - default: break; - case ARM::tMOVr: - case ARM::tMOVtgpr2gpr: - case ARM::tMOVgpr2tgpr: - case ARM::tMOVgpr2gpr: { - if (OpNum == 0) { // move -> store - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - !isARMLowRegister(SrcReg)) - // tSpill cannot take a high register operand. - break; - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0)); - } else { // move -> load - unsigned DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg) && - !isARMLowRegister(DstReg)) - // tRestore cannot target a high register operand. - break; - bool isDead = MI->getOperand(0).isDead(); - NewMI = AddDefaultPred(BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore)) - .addReg(DstReg, - RegState::Define | getDeadRegState(isDead)) - .addFrameIndex(FI).addImm(0)); - } - break; - } - } - - return NewMI; -} diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index c937296..555135a 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -46,12 +46,10 @@ public: const std::vector &CSI, const TargetRegisterInfo *TRI) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -64,20 +62,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } }; } diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 2f635fe..39b70b4 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -68,21 +68,6 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass* -Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const { - if (isARMLowRegister(Reg)) - return ARM::tGPRRegisterClass; - switch (Reg) { - default: - break; - case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: - case ARM::R12: case ARM::SP: case ARM::LR: case ARM::PC: - return ARM::GPRRegisterClass; - } - - return TargetRegisterInfo::getPhysicalRegisterRegClass(Reg, VT); -} - bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); @@ -410,6 +395,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB, // before that instead and adjust the UseMI. bool done = false; for (MachineBasicBlock::iterator II = I; !done && II != UseMI ; ++II) { + if (II->isDebugValue()) + continue; // If this instruction affects R12, adjust our restore point. for (unsigned i = 0, e = II->getNumOperands(); i != e; ++i) { const MachineOperand &MO = II->getOperand(i); diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 4eca367..9a0308af 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -38,9 +38,6 @@ public: unsigned PredReg = 0) const; /// Code Generation virtual methods... - const TargetRegisterClass * - getPhysicalRegisterRegClass(unsigned Reg, EVT VT = MVT::Other) const; - bool hasReservedCallFrame(MachineFunction &MF) const; void eliminateCallFramePseudoInstr(MachineFunction &MF, @@ -51,7 +48,8 @@ public: // could not be handled directly in MI. int rewriteFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int Offset, - unsigned MOVOpc, unsigned ADDriOpc, unsigned SUBriOpc) const; + unsigned MOVOpc, unsigned ADDriOpc, + unsigned SUBriOpc) const; bool saveScavengerRegister(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.cpp b/lib/Target/ARM/Thumb2HazardRecognizer.cpp new file mode 100644 index 0000000..172908d --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.cpp @@ -0,0 +1,53 @@ +//===-- Thumb2HazardRecognizer.cpp - Thumb2 postra hazard recognizer ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "ARM.h" +#include "Thumb2HazardRecognizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ScheduleDAG.h" +using namespace llvm; + +ScheduleHazardRecognizer::HazardType +Thumb2HazardRecognizer::getHazardType(SUnit *SU) { + if (ITBlockSize) { + MachineInstr *MI = SU->getInstr(); + if (!MI->isDebugValue() && MI != ITBlockMIs[ITBlockSize-1]) + return Hazard; + } + + return PostRAHazardRecognizer::getHazardType(SU); +} + +void Thumb2HazardRecognizer::Reset() { + ITBlockSize = 0; + PostRAHazardRecognizer::Reset(); +} + +void Thumb2HazardRecognizer::EmitInstruction(SUnit *SU) { + MachineInstr *MI = SU->getInstr(); + unsigned Opcode = MI->getOpcode(); + if (ITBlockSize) { + --ITBlockSize; + } else if (Opcode == ARM::t2IT) { + unsigned Mask = MI->getOperand(1).getImm(); + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + ITBlockSize = 4 - NumTZ; + MachineBasicBlock::iterator I = MI; + for (unsigned i = 0; i < ITBlockSize; ++i) { + // Advance to the next instruction, skipping any dbg_value instructions. + do { + ++I; + } while (I->isDebugValue()); + ITBlockMIs[ITBlockSize-1-i] = &*I; + } + } + + PostRAHazardRecognizer::EmitInstruction(SU); +} diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h new file mode 100644 index 0000000..4726658 --- /dev/null +++ b/lib/Target/ARM/Thumb2HazardRecognizer.h @@ -0,0 +1,40 @@ +//===-- Thumb2HazardRecognizer.h - Thumb2 Hazard Recognizers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines hazard recognizers for scheduling Thumb2 functions on +// ARM processors. +// +//===----------------------------------------------------------------------===// + +#ifndef THUMB2HAZARDRECOGNIZER_H +#define THUMB2HAZARDRECOGNIZER_H + +#include "llvm/CodeGen/PostRAHazardRecognizer.h" + +namespace llvm { + +class MachineInstr; + +class Thumb2HazardRecognizer : public PostRAHazardRecognizer { + unsigned ITBlockSize; // No. of MIs in current IT block yet to be scheduled. + MachineInstr *ITBlockMIs[4]; + +public: + Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : + PostRAHazardRecognizer(ItinData) {} + + virtual HazardType getHazardType(SUnit *SU); + virtual void Reset(); + virtual void EmitInstruction(SUnit *SU); +}; + + +} // end namespace llvm + +#endif // THUMB2HAZARDRECOGNIZER_H diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f36d4ef..cd15bbe 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -14,17 +14,23 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" using namespace llvm; -STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumITs, "Number of IT blocks inserted"); +STATISTIC(NumMovedInsts, "Number of predicated instructions moved"); namespace { - struct Thumb2ITBlockPass : public MachineFunctionPass { + class Thumb2ITBlockPass : public MachineFunctionPass { + bool PreRegAlloc; + + public: static char ID; Thumb2ITBlockPass() : MachineFunctionPass(&ID) {} const Thumb2InstrInfo *TII; + const TargetRegisterInfo *TRI; ARMFunctionInfo *AFI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -34,61 +40,167 @@ namespace { } private: - bool InsertITBlocks(MachineBasicBlock &MBB); + bool MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet &Defs, + SmallSet &Uses); + bool InsertITInstructions(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; } -static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ - unsigned Opc = MI->getOpcode(); - if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) - return ARMCC::AL; - return llvm::getInstrPredicate(MI, PredReg); +/// TrackDefUses - Tracking what registers are being defined and used by +/// instructions in the IT block. This also tracks "dependencies", i.e. uses +/// in the IT block that are defined before the IT instruction. +static void TrackDefUses(MachineInstr *MI, + SmallSet &Defs, + SmallSet &Uses, + const TargetRegisterInfo *TRI) { + SmallVector LocalDefs; + SmallVector LocalUses; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || Reg == ARM::ITSTATE || Reg == ARM::SP) + continue; + if (MO.isUse()) + LocalUses.push_back(Reg); + else + LocalDefs.push_back(Reg); + } + + for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { + unsigned Reg = LocalUses[i]; + Uses.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Uses.insert(*Subreg); + } + + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + Defs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Defs.insert(*Subreg); + if (Reg == ARM::CPSR) + continue; + } +} + +bool +Thumb2ITBlockPass::MoveCopyOutOfITBlock(MachineInstr *MI, + ARMCC::CondCodes CC, ARMCC::CondCodes OCC, + SmallSet &Defs, + SmallSet &Uses) { + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { + assert(SrcSubIdx == 0 && DstSubIdx == 0 && + "Sub-register indices still around?"); + // llvm models select's as two-address instructions. That means a copy + // is inserted before a t2MOVccr, etc. If the copy is scheduled in + // between selects we would end up creating multiple IT blocks. + + // First check if it's safe to move it. + if (Uses.count(DstReg) || Defs.count(SrcReg)) + return false; + + // Then peek at the next instruction to see if it's predicated on CC or OCC. + // If not, then there is nothing to be gained by moving the copy. + MachineBasicBlock::iterator I = MI; ++I; + MachineBasicBlock::iterator E = MI->getParent()->end(); + while (I != E && I->isDebugValue()) + ++I; + if (I != E) { + unsigned NPredReg = 0; + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(I, NPredReg); + if (NCC == CC || NCC == OCC) + return true; + } + } + return false; } -bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { +bool Thumb2ITBlockPass::InsertITInstructions(MachineBasicBlock &MBB) { bool Modified = false; + SmallSet Defs; + SmallSet Uses; MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineInstr *MI = &*MBBI; DebugLoc dl = MI->getDebugLoc(); unsigned PredReg = 0; - ARMCC::CondCodes CC = getPredicate(MI, PredReg); - + ARMCC::CondCodes CC = llvm::getITInstrPredicate(MI, PredReg); if (CC == ARMCC::AL) { ++MBBI; continue; } + Defs.clear(); + Uses.clear(); + TrackDefUses(MI, Defs, Uses, TRI); + // Insert an IT instruction. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(ARM::t2IT)) .addImm(CC); + + // Add implicit use of ITSTATE to IT block instructions. + MI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + + MachineInstr *LastITMI = MI; + MachineBasicBlock::iterator InsertPos = MIB; ++MBBI; - // Finalize IT mask. + // Form IT block. ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); unsigned Mask = 0, Pos = 3; // Branches, including tricky ones like LDM_RET, need to end an IT // block so check the instruction we just put in the block. - while (MBBI != E && Pos && - (!MI->getDesc().isBranch() && !MI->getDesc().isReturn())) { + for (; MBBI != E && Pos && + (!MI->getDesc().isBranch() && !MI->getDesc().isReturn()) ; ++MBBI) { + if (MBBI->isDebugValue()) + continue; + MachineInstr *NMI = &*MBBI; MI = NMI; - DebugLoc ndl = NMI->getDebugLoc(); + unsigned NPredReg = 0; - ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == CC || NCC == OCC) + ARMCC::CondCodes NCC = llvm::getITInstrPredicate(NMI, NPredReg); + if (NCC == CC || NCC == OCC) { Mask |= (NCC & 1) << Pos; - else + // Add implicit use of ITSTATE. + NMI->addOperand(MachineOperand::CreateReg(ARM::ITSTATE, false/*ifDef*/, + true/*isImp*/, false/*isKill*/)); + LastITMI = NMI; + } else { + if (NCC == ARMCC::AL && + MoveCopyOutOfITBlock(NMI, CC, OCC, Defs, Uses)) { + --MBBI; + MBB.remove(NMI); + MBB.insert(InsertPos, NMI); + ++NumMovedInsts; + continue; + } break; + } + TrackDefUses(NMI, Defs, Uses, TRI); --Pos; - ++MBBI; } + + // Finalize IT mask. Mask |= (1 << Pos); // Tag along (firstcond[0] << 4) with the mask. Mask |= (CC & 1) << 4; MIB.addImm(Mask); + + // Last instruction in IT block kills ITSTATE. + LastITMI->findRegisterUseOperand(ARM::ITSTATE)->setIsKill(); + Modified = true; ++NumITs; } @@ -100,17 +212,21 @@ bool Thumb2ITBlockPass::runOnMachineFunction(MachineFunction &Fn) { const TargetMachine &TM = Fn.getTarget(); AFI = Fn.getInfo(); TII = static_cast(TM.getInstrInfo()); + TRI = TM.getRegisterInfo(); if (!AFI->isThumbFunction()) return false; bool Modified = false; - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ) { MachineBasicBlock &MBB = *MFI; - Modified |= InsertITBlocks(MBB); + ++MFI; + Modified |= InsertITInstructions(MBB); } + if (Modified) + AFI->setHasITBlocks(true); + return Modified; } diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 531d5e9..ee51727 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -17,15 +17,27 @@ #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" +#include "Thumb2HazardRecognizer.h" +#include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/ADT/SmallVector.h" -#include "Thumb2InstrInfo.h" +#include "llvm/Support/CommandLine.h" using namespace llvm; +static cl::opt +IfCvtLimit("thumb2-ifcvt-limit", cl::Hidden, + cl::desc("Thumb2 if-conversion limit (default 3)"), + cl::init(3)); + +static cl::opt +IfCvtDiamondLimit("thumb2-ifcvt-diamond-limit", cl::Hidden, + cl::desc("Thumb2 diamond if-conversion limit (default 3)"), + cl::init(3)); + Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI) : ARMBaseInstrInfo(STI), RI(*this, STI) { } @@ -35,33 +47,99 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const { return 0; } -bool -Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == ARM::GPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); - return true; - } - } else if (DestRC == ARM::tGPRRegisterClass) { - if (SrcRC == ARM::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVgpr2tgpr), DestReg).addReg(SrcReg); - return true; - } else if (SrcRC == ARM::tGPRRegisterClass) { - BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg); - return true; +void +Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + ARMFunctionInfo *AFI = MBB->getParent()->getInfo(); + if (!AFI->hasITBlocks()) { + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + return; + } + + // If the first instruction of Tail is predicated, we may have to update + // the IT instruction. + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(Tail, PredReg); + MachineBasicBlock::iterator MBBI = Tail; + if (CC != ARMCC::AL) + // Expecting at least the t2IT instruction before it. + --MBBI; + + // Actually replace the tail. + TargetInstrInfoImpl::ReplaceTailWithBranchTo(Tail, NewDest); + + // Fix up IT. + if (CC != ARMCC::AL) { + MachineBasicBlock::iterator E = MBB->begin(); + unsigned Count = 4; // At most 4 instructions in an IT block. + while (Count && MBBI != E) { + if (MBBI->isDebugValue()) { + --MBBI; + continue; + } + if (MBBI->getOpcode() == ARM::t2IT) { + unsigned Mask = MBBI->getOperand(1).getImm(); + if (Count == 4) + MBBI->eraseFromParent(); + else { + unsigned MaskOn = 1 << Count; + unsigned MaskOff = ~(MaskOn - 1); + MBBI->getOperand(1).setImm((Mask & MaskOff) | MaskOn); + } + return; + } + --MBBI; + --Count; } + + // Ctrl flow can reach here if branch folding is run before IT block + // formation pass. } +} + +bool +Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const { + unsigned PredReg = 0; + return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; +} +bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, + unsigned NumInstrs) const { + return NumInstrs && NumInstrs <= IfCvtLimit; +} + +bool Thumb2InstrInfo:: +isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT, + MachineBasicBlock &FMBB, unsigned NumF) const { + // FIXME: Catch optimization such as: + // r0 = movne + // r0 = moveq + return NumT && NumF && + NumT <= (IfCvtDiamondLimit) && NumF <= (IfCvtDiamondLimit); +} + +void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // Handle SPR, DPR, and QPR copies. - return ARMBaseInstrInfo::copyRegToReg(MBB, I, DestReg, SrcReg, DestRC, SrcRC, DL); + if (!ARM::GPRRegClass.contains(DestReg, SrcReg)) + return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc); + + bool tDest = ARM::tGPRRegClass.contains(DestReg); + bool tSrc = ARM::tGPRRegClass.contains(SrcReg); + unsigned Opc = ARM::tMOVgpr2gpr; + if (tDest && tSrc) + Opc = ARM::tMOVr; + else if (tSrc) + Opc = ARM::tMOVtgpr2gpr; + else if (tDest) + Opc = ARM::tMOVgpr2tgpr; + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void Thumb2InstrInfo:: @@ -69,7 +147,8 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -94,7 +173,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) { + if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass || + RC == ARM::tcGPRRegisterClass) { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); @@ -113,6 +193,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI); } +ScheduleHazardRecognizer *Thumb2InstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, @@ -131,14 +216,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg); Fits = true; } @@ -502,3 +587,54 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, Offset = (isSub) ? -Offset : Offset; return Offset == 0; } + +/// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the +/// two-addrss instruction inserted by two-address pass. +void +Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI, + MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const { + if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr || + SrcMI->getOperand(1).isKill()) + return; + + unsigned PredReg = 0; + ARMCC::CondCodes CC = llvm::getInstrPredicate(UseMI, PredReg); + if (CC == ARMCC::AL || PredReg != ARM::CPSR) + return; + + // Schedule the copy so it doesn't come between previous instructions + // and UseMI which can form an IT block. + unsigned SrcReg = SrcMI->getOperand(1).getReg(); + ARMCC::CondCodes OCC = ARMCC::getOppositeCondition(CC); + MachineBasicBlock *MBB = UseMI->getParent(); + MachineBasicBlock::iterator MBBI = SrcMI; + unsigned NumInsts = 0; + while (--MBBI != MBB->begin()) { + if (MBBI->isDebugValue()) + continue; + + MachineInstr *NMI = &*MBBI; + ARMCC::CondCodes NCC = llvm::getInstrPredicate(NMI, PredReg); + if (!(NCC == CC || NCC == OCC) || + NMI->modifiesRegister(SrcReg, &TRI) || + NMI->definesRegister(ARM::CPSR)) + break; + if (++NumInsts == 4) + // Too many in a row! + return; + } + + if (NumInsts) { + MBB->remove(SrcMI); + MBB->insert(++MBBI, SrcMI); + } +} + +ARMCC::CondCodes +llvm::getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg) { + unsigned Opc = MI->getOpcode(); + if (Opc == ARM::tBcc || Opc == ARM::t2Bcc) + return ARMCC::AL; + return llvm::getInstrPredicate(MI, PredReg); +} diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 2948770..3a9f8b1 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -20,7 +20,8 @@ #include "Thumb2RegisterInfo.h" namespace llvm { - class ARMSubtarget; +class ARMSubtarget; +class ScheduleHazardRecognizer; class Thumb2InstrInfo : public ARMBaseInstrInfo { Thumb2RegisterInfo RI; @@ -31,12 +32,21 @@ public: // if there is not such an opcode. unsigned getUnindexedOpcode(unsigned Opc) const; - bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const; + + bool isLegalToSplitMBBAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) const; + + bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs) const; + + bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs, + MachineBasicBlock &FMBB, unsigned NumFInstrs) const; + + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -50,12 +60,27 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; + /// scheduleTwoAddrSource - Schedule the copy / re-mat of the source of the + /// two-addrss instruction inserted by two-address pass. + void scheduleTwoAddrSource(MachineInstr *SrcMI, MachineInstr *UseMI, + const TargetRegisterInfo &TRI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } + + ScheduleHazardRecognizer * + CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; }; + +/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical +/// to llvm::getInstrPredicate except it returns AL for conditional branch +/// instructions which are "predicated", but are not in IT blocks. +ARMCC::CondCodes getITInstrPredicate(const MachineInstr *MI, unsigned &PredReg); + + } #endif // THUMB2INSTRUCTIONINFO_H diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 8fe2e42..ba392f3 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -451,11 +451,18 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; - const TargetInstrDesc &TID = MI->getDesc(); unsigned Reg0 = MI->getOperand(0).getReg(); unsigned Reg1 = MI->getOperand(1).getReg(); - if (Reg0 != Reg1) - return false; + if (Reg0 != Reg1) { + // Try to commute the operands to make it a 2-address instruction. + unsigned CommOpIdx1, CommOpIdx2; + if (!TII->findCommutedOpIndices(MI, CommOpIdx1, CommOpIdx2) || + CommOpIdx1 != 1 || MI->getOperand(CommOpIdx2).getReg() != Reg0) + return false; + MachineInstr *CommutedMI = TII->commuteInstruction(MI); + if (!CommutedMI) + return false; + } if (Entry.LowRegs2 && !isARMLowRegister(Reg0)) return false; if (Entry.Imm2Limit) { @@ -484,6 +491,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool HasCC = false; bool CCDead = false; + const TargetInstrDesc &TID = MI->getDesc(); if (TID.hasOptionalDef()) { unsigned NumOps = TID.getNumOperands(); HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR); @@ -689,7 +697,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { goto ProcessNext; } - // Try to transform ro a 16-bit non-two-address instruction. + // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 1d85f12..ea78bf3 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -224,6 +224,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -251,7 +252,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -425,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, } } else { //more args // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false); + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -444,7 +445,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true); if (i == 0) FuncInfo->setVarArgsBase(FI); SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, @@ -453,7 +454,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true); SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0, false, false, 0)); @@ -470,6 +471,7 @@ SDValue AlphaTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, @@ -483,7 +485,7 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, break; //return SDValue(); // ret void is legal case 1: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg; if (ArgVT.isInteger()) ArgReg = Alpha::R0; @@ -492,13 +494,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg = Alpha::F0; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (DAG.getMachineFunction().getRegInfo().liveout_empty()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); break; } case 2: { - EVT ArgVT = Outs[0].Val.getValueType(); + EVT ArgVT = Outs[0].VT; unsigned ArgReg1, ArgReg2; if (ArgVT.isInteger()) { ArgReg1 = Alpha::R0; @@ -509,13 +511,13 @@ AlphaTargetLowering::LowerReturn(SDValue Chain, ArgReg2 = Alpha::F1; } Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, - Outs[0].Val, Copy.getValue(1)); + OutVals[0], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) == DAG.getMachineFunction().getRegInfo().liveout_end()) DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, - Outs[1].Val, Copy.getValue(1)); + OutVals[1], Copy.getValue(1)); if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) == DAG.getMachineFunction().getRegInfo().liveout_end()) @@ -539,7 +541,7 @@ void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, false, false, 0); SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, DAG.getConstant(8, MVT::i64)); - SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), + SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Base.getValue(1), Tmp, NULL, 0, MVT::i32, false, false, 0); DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); if (N->getValueType(0).isFloatingPoint()) @@ -643,10 +645,12 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, case ISD::GlobalAddress: { GlobalAddressSDNode *GSDN = cast(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i64, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, + GSDN->getOffset()); // FIXME there isn't really any debug info here - // if (!GV->hasWeakLinkage() && !GV->isDeclaration() && !GV->hasLinkOnceLinkage()) { + // if (!GV->hasWeakLinkage() && !GV->isDeclaration() + // && !GV->hasLinkOnceLinkage()) { if (GV->hasLocalLinkage()) { SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); @@ -702,7 +706,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, SDValue Result; if (Op.getValueType() == MVT::i32) - Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, + Result = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Chain, DataPtr, NULL, 0, MVT::i32, false, false, 0); else Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, NULL, 0, @@ -722,7 +726,7 @@ SDValue AlphaTargetLowering::LowerOperation(SDValue Op, false, false, 0); SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, + Val = DAG.getExtLoad(ISD::SEXTLOAD, MVT::i64, dl, Result, NP, NULL,0, MVT::i32, false, false, 0); SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, DAG.getConstant(8, MVT::i64)); @@ -863,7 +867,10 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - sinkMBB->transferSuccessors(thisMBB); + sinkMBB->splice(sinkMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(MI)), + thisMBB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB); F->insert(It, llscMBB); F->insert(It, sinkMBB); @@ -912,7 +919,7 @@ AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, thisMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(llscMBB); llscMBB->addSuccessor(sinkMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index 7ee823a..46e0c7d 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -121,6 +121,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -129,6 +130,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td index d984556..6f4ebf2 100644 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -182,7 +182,7 @@ class OForm4 opcode, bits<7> fun, string asmstr, list pattern, Inst bits<5> Rb; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-16} = Rb; let Inst{15-13} = 0; @@ -223,7 +223,7 @@ class OForm4L opcode, bits<7> fun, string asmstr, list pattern, Ins bits<8> LIT; bits<7> Function = fun; -// let isTwoAddress = 1; +// let Constraints = "$RFALSE = $RDEST"; let Inst{25-21} = Ra; let Inst{20-13} = LIT; let Inst{12} = 1; diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp index 3aba363..ad625a2 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ b/lib/Target/Alpha/AlphaInstrInfo.cpp @@ -110,9 +110,8 @@ static bool isAlphaIntCondCode(unsigned Opcode) { unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL) const { assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && "Alpha branch conditions have two components!"); @@ -120,58 +119,47 @@ unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(TBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB); else // Conditional branch if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_I)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); else - BuildMI(&MBB, dl, get(Alpha::COND_BRANCH_F)) + BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Alpha::BR)).addMBB(FBB); + BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB); return 2; } -bool AlphaInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - //cerr << "copyRegToReg " << DestReg << " <- " << SrcReg << "\n"; - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == Alpha::GPRCRegisterClass) { +void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F4RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg) .addReg(SrcReg) - .addReg(SrcReg); - } else if (DestRC == Alpha::F8RCRegisterClass) { + .addReg(SrcReg, getKillRegState(KillSrc)); + } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg) .addReg(SrcReg) - .addReg(SrcReg); + .addReg(SrcReg, getKillRegState(KillSrc)); } else { - // Attempt to copy register that is not GPR or FPR - return false; + llvm_unreachable("Attempt to copy register that is not GPR or FPR"); } - - return true; } void @@ -227,51 +215,6 @@ AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Unhandled register class"); } -MachineInstr *AlphaInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - MachineInstr *NewMI = NULL; - switch(Opc) { - default: - break; - case Alpha::BISr: - case Alpha::CPYSS: - case Alpha::CPYST: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) { - if (Ops[0] == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::STQ : - ((Opc == Alpha::CPYSS) ? Alpha::STS : Alpha::STT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } else { // load -> move - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - Opc = (Opc == Alpha::BISr) ? Alpha::LDQ : - ((Opc == Alpha::CPYSS) ? Alpha::LDS : Alpha::LDT); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) - .addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addFrameIndex(FrameIndex) - .addReg(Alpha::F31); - } - } - break; - } - return NewMI; -} - static unsigned AlphaRevCondCode(unsigned Opcode) { switch (Opcode) { case Alpha::BEQ: return Alpha::BNE; @@ -428,11 +371,8 @@ unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Alpha::R29, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Alpha::R29); RegInfo.addLiveIn(Alpha::R29); AlphaFI->setGlobalBaseReg(GlobalBaseReg); @@ -456,11 +396,8 @@ unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalRetAddr, Alpha::R26, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global return address register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalRetAddr).addReg(Alpha::R26); RegInfo.addLiveIn(Alpha::R26); AlphaFI->setGlobalRetAddr(GlobalRetAddr); diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h index 7d7365b..e20e832 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ b/lib/Target/Alpha/AlphaInstrInfo.h @@ -42,14 +42,13 @@ public: int &FrameIndex) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -62,18 +61,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index a47a29b..92de78a 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -680,18 +680,32 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", } //conditional moves, floats -let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { -def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero -def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero -def FCMOVGTS : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if > zero -def FCMOVLES : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if <= zero -def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; // FCMOVE if < zero -def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero +let OutOperandList = (outs F4RC:$RDEST), + InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { +def FCMOVEQS : FPForm<0x17, 0x02A, + "fcmoveq $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if = zero +def FCMOVGES : FPForm<0x17, 0x02D, + "fcmovge $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if >= zero +def FCMOVGTS : FPForm<0x17, 0x02F, + "fcmovgt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if > zero +def FCMOVLES : FPForm<0x17, 0x02E, + "fcmovle $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if <= zero +def FCMOVLTS : FPForm<0x17, 0x02C, + "fcmovlt $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; // FCMOVE if < zero +def FCMOVNES : FPForm<0x17, 0x02B, + "fcmovne $RCOND,$RTRUE,$RDEST", + [], s_fcmov>; //FCMOVE if != zero } //conditional moves, doubles -let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), - isTwoAddress = 1 in { +let OutOperandList = (outs F8RC:$RDEST), + InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), + Constraints = "$RTRUE = $RDEST" in { def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index c083d8c..dc9d935 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -74,20 +74,6 @@ const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -AlphaRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::GPRCRegClass, &Alpha::GPRCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, - &Alpha::F8RCRegClass, &Alpha::F8RCRegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(Alpha::R15); diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 720367a..f9fd87a 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -30,9 +30,6 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp index b4da96c..80ee107 100644 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp @@ -132,8 +132,8 @@ static void UpdateNodeOperand(SelectionDAG &DAG, SDValue Val) { SmallVector ops(N->op_begin(), N->op_end()); ops[Num] = Val; - SDValue New = DAG.UpdateNodeOperands(SDValue(N, 0), ops.data(), ops.size()); - DAG.ReplaceAllUsesWith(N, New.getNode()); + SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size()); + DAG.ReplaceAllUsesWith(N, New); } // After instruction selection, insert COPY_TO_REGCLASS nodes to help in diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index adf2118..6e828e1 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -143,7 +143,7 @@ SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, DebugLoc DL = Op.getDebugLoc(); const GlobalValue *GV = cast(Op)->getGlobal(); - Op = DAG.getTargetGlobalAddress(GV, MVT::i32); + Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); } @@ -205,8 +205,7 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), - true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0, false, false, 0)); @@ -220,6 +219,7 @@ SDValue BlackfinTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -245,7 +245,7 @@ BlackfinTargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Opi = Outs[i].Val; + SDValue Opi = OutVals[i]; // Expand to i32 if necessary switch (VA.getLocInfo()) { @@ -278,6 +278,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -301,7 +302,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -357,7 +358,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index a784248..6bebcc3 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -63,6 +63,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -71,6 +72,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } // end namespace llvm diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp index 73924b7..a74d42d 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp @@ -104,10 +104,8 @@ unsigned BlackfinInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -124,69 +122,73 @@ InsertBranch(MachineBasicBlock &MBB, llvm_unreachable("Implement conditional branches!"); } -static bool inClass(const TargetRegisterClass &Test, - unsigned Reg, - const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return Test.contains(Reg); - else - return &Test==RC || Test.hasSubClass(RC); -} - -bool BlackfinInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (inClass(BF::ALLRegClass, DestReg, DestRC) && - inClass(BF::ALLRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE), DestReg).addReg(SrcReg); - return true; +void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (BF::ALLRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::MOVE), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::D16RegClass, DestReg, DestRC) && - inClass(BF::D16RegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg).addReg(SrcReg).addImm(0); - return true; + if (BF::D16RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (inClass(BF::AnyCCRegClass, SrcReg, SrcRC) && - inClass(BF::DRegClass, DestReg, DestRC)) { - if (inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg).addReg(SrcReg); + if (BF::DRegClass.contains(DestReg)) { + if (SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0); - } else { - BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg).addReg(SrcReg); + return; + } + if (SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return true; } - if (inClass(BF::AnyCCRegClass, DestReg, DestRC) && - inClass(BF::DRegClass, SrcReg, SrcRC)) { - if (inClass(BF::NotCCRegClass, DestReg, DestRC)) - BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg).addReg(SrcReg); - else - BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg).addReg(SrcReg); - return true; + if (BF::DRegClass.contains(SrcReg)) { + if (DestReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); + return; + } + if (DestReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } } - if (inClass(BF::NotCCRegClass, DestReg, DestRC) && - inClass(BF::JustCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg).addReg(SrcReg); - return true; + + if (DestReg == BF::NCC && SrcReg == BF::CC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - if (inClass(BF::JustCCRegClass, DestReg, DestRC) && - inClass(BF::NotCCRegClass, SrcReg, SrcRC)) { - BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg).addReg(SrcReg); - return true; + if (DestReg == BF::CC && SrcReg == BF::NCC) { + BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - llvm_unreachable((std::string("Bad regclasses for reg-to-reg copy: ")+ - SrcRC->getName() + " -> " + DestRC->getName()).c_str()); - return false; + llvm_unreachable("Bad reg-to-reg copy"); +} + +static bool inClass(const TargetRegisterClass &Test, + unsigned Reg, + const TargetRegisterClass *RC) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Test.contains(Reg); + else + return &Test==RC || Test.hasSubClass(RC); } void diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h index c1dcd58..6c35917 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ b/lib/Target/Blackfin/BlackfinInstrInfo.h @@ -44,14 +44,13 @@ namespace llvm { InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; + + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 5cf350a..8034a7f 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -488,7 +488,7 @@ def MOVE: F1<(outs ALL:$dst), (ins ALL:$src), "$dst = $src;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc), "if $cc $dst = $src2;", [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>; @@ -645,7 +645,7 @@ def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2), // Table C-15. Bit Operations Instructions //===----------------------------------------------------------------------===// -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2), "bitclr($dst, $src2);", [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>; @@ -691,7 +691,7 @@ multiclass SHIFT32 { } let Defs = [AZ, AN, V, VS], - isTwoAddress = 1 in { + Constraints = "$src = $dst" in { defm SRA : SHIFT32>>">; defm SRL : SHIFT32>">; defm SLL : SHIFT32; @@ -748,7 +748,7 @@ def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), "$dst = $src1 + $src2;", [(set D16:$dst, (add D16:$src1, D16:$src2))]>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2), "$dst += $src2;", [(set D:$dst, (add D:$src1, imm7:$src2))]>; @@ -775,7 +775,7 @@ def NEG: F1<(outs D:$dst), (ins D:$src), def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2), "$dst = $src1 + $src2;", []>; -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2), "$dst += $src2;", []>; @@ -802,7 +802,7 @@ def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), } -let isTwoAddress = 1 in +let Constraints = "$src1 = $dst" in def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2), "$dst *= $src2;", [(set D:$dst, (mul D:$src1, D:$src2))]>; diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 5153ace..06e95de 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -48,17 +48,6 @@ BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const *BlackfinRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - using namespace BF; - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &PRegClass, - &DRegClass, &DRegClass, &DRegClass, &DRegClass, - &PRegClass, &PRegClass, &PRegClass, - 0 }; - return CalleeSavedRegClasses; -} - BitVector BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { using namespace BF; @@ -86,25 +75,6 @@ BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass* -BlackfinRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { - assert(isPhysicalRegister(reg) && "reg must be a physical register"); - - // Pick the smallest register class of the right type that contains - // this physreg. - const TargetRegisterClass* BestRC = 0; - for (regclass_iterator I = regclass_begin(), E = regclass_end(); - I != E; ++I) { - const TargetRegisterClass* RC = *I; - if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || RC->getNumRegs() < BestRC->getNumRegs())) - BestRC = RC; - } - - assert(BestRC && "Couldn't find the register class"); - return BestRC; -} - // hasFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 03c5450..ead0b4a 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -33,9 +33,6 @@ namespace llvm { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; // getSubReg implemented by tablegen @@ -44,9 +41,6 @@ namespace llvm { return &BF::PRegClass; } - const TargetRegisterClass *getPhysicalRegisterRegClass(unsigned reg, - EVT VT) const; - bool hasFP(const MachineFunction &MF) const; // bool hasReservedCallFrame(MachineFunction &MF) const; diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 55b8aaa..e8d8474 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -264,7 +264,7 @@ namespace { // static const AllocaInst *isDirectAlloca(const Value *V) { const AllocaInst *AI = dyn_cast(V); - if (!AI) return false; + if (!AI) return 0; if (AI->isArrayAllocation()) return 0; // FIXME: we can also inline fixed size array allocas! if (AI->getParent() != &AI->getParent()->getParent()->getEntryBlock()) @@ -2889,7 +2889,7 @@ void CWriter::visitCallInst(CallInst &I) { bool hasByVal = I.hasByValArgument(); bool isStructRet = I.hasStructRetAttr(); if (isStructRet) { - writeOperandDeref(I.getOperand(1)); + writeOperandDeref(I.getArgOperand(0)); Out << " = "; } @@ -2944,8 +2944,8 @@ void CWriter::visitCallInst(CallInst &I) { } unsigned NumDeclaredParams = FTy->getNumParams(); - - CallSite::arg_iterator AI = I.op_begin()+1, AE = I.op_end(); + CallSite CS(&I); + CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); unsigned ArgNo = 0; if (isStructRet) { // Skip struct return argument. ++AI; @@ -2999,7 +2999,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << "0; "; Out << "va_start(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; // Output the last argument to the enclosing function. if (I.getParent()->getParent()->arg_empty()) @@ -3009,9 +3009,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << ')'; return true; case Intrinsic::vaend: - if (!isa(I.getOperand(1))) { + if (!isa(I.getArgOperand(0))) { Out << "0; va_end(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; } else { Out << "va_end(*(va_list*)0)"; @@ -3020,47 +3020,47 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, case Intrinsic::vacopy: Out << "0; "; Out << "va_copy(*(va_list*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", *(va_list*)"; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::returnaddress: Out << "__builtin_return_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::frameaddress: Out << "__builtin_frame_address("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::powi: Out << "__builtin_powi("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::setjmp: Out << "setjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ')'; return true; case Intrinsic::longjmp: Out << "longjmp(*(jmp_buf*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ')'; return true; case Intrinsic::prefetch: Out << "LLVM_PREFETCH((const void *)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ", "; - writeOperand(I.getOperand(3)); + writeOperand(I.getArgOperand(2)); Out << ")"; return true; case Intrinsic::stacksave: @@ -3077,7 +3077,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; // Multiple GCC builtins multiplex onto this intrinsic. - switch (cast(I.getOperand(3))->getZExtValue()) { + switch (cast(I.getArgOperand(2))->getZExtValue()) { default: llvm_unreachable("Invalid llvm.x86.sse.cmp!"); case 0: Out << "__builtin_ia32_cmpeq"; break; case 1: Out << "__builtin_ia32_cmplt"; break; @@ -3098,9 +3098,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, Out << 'd'; Out << "("; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ", "; - writeOperand(I.getOperand(2)); + writeOperand(I.getArgOperand(1)); Out << ")"; return true; case Intrinsic::ppc_altivec_lvsl: @@ -3108,7 +3108,7 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID, printType(Out, I.getType()); Out << ')'; Out << "__builtin_altivec_lvsl(0, (void*)"; - writeOperand(I.getOperand(1)); + writeOperand(I.getArgOperand(0)); Out << ")"; return true; } @@ -3221,7 +3221,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { DestVal = ResultVals[ValueCount].first; DestValNo = ResultVals[ValueCount].second; } else - DestVal = CI.getOperand(ValueCount-ResultVals.size()+1); + DestVal = CI.getArgOperand(ValueCount-ResultVals.size()); if (I->isEarlyClobber) C = "&"+C; @@ -3255,7 +3255,7 @@ void CWriter::visitInlineAsm(CallInst &CI) { } assert(ValueCount >= ResultVals.size() && "Input can't refer to result"); - Value *SrcVal = CI.getOperand(ValueCount-ResultVals.size()+1); + Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size()); Out << "\"" << C << "\"("; if (!I->isIndirect) diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td index 10dc837..ec2f663 100644 --- a/lib/Target/CellSPU/SPUCallingConv.td +++ b/lib/Target/CellSPU/SPUCallingConv.td @@ -34,76 +34,19 @@ def RetCC_SPU : CallingConv<[ //===----------------------------------------------------------------------===// // CellSPU Argument Calling Conventions -// (note: this isn't used, but presumably should be at some point when other -// targets do.) //===----------------------------------------------------------------------===// -/* -def CC_SPU : CallingConv<[ - CCIfType<[i8], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i16], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[i64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[f64], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - CCIfType<[v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], - CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, - R12, R13, R14, R15, R16, R17, R18, R19, R20, - R21, R22, R23, R24, R25, R26, R27, R28, R29, - R30, R31, R32, R33, R34, R35, R36, R37, R38, - R39, R40, R41, R42, R43, R44, R45, R46, R47, - R48, R49, R50, R51, R52, R53, R54, R55, R56, - R57, R58, R59, R60, R61, R62, R63, R64, R65, - R66, R67, R68, R69, R70, R71, R72, R73, R74, - R75, R76, R77, R78, R79]>>, - +def CCC_SPU : CallingConv<[ + CCIfType<[i8, i16, i32, i64, i128, f32, f64, + v16i8, v8i16, v4i32, v4f32, v2i64, v2f64], + CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10, R11, + R12, R13, R14, R15, R16, R17, R18, R19, R20, + R21, R22, R23, R24, R25, R26, R27, R28, R29, + R30, R31, R32, R33, R34, R35, R36, R37, R38, + R39, R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, R56, + R57, R58, R59, R60, R61, R62, R63, R64, R65, + R66, R67, R68, R69, R70, R71, R72, R73, R74, + R75, R76, R77, R78, R79]>>, // Integer/FP values get stored in stack slots that are 8 bytes in size and // 8-byte aligned if there are no more registers to hold them. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, @@ -112,4 +55,3 @@ def CC_SPU : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>> ]>; -*/ diff --git a/lib/Target/CellSPU/SPUFrameInfo.h b/lib/Target/CellSPU/SPUFrameInfo.h index e8ca333..f511acd 100644 --- a/lib/Target/CellSPU/SPUFrameInfo.h +++ b/lib/Target/CellSPU/SPUFrameInfo.h @@ -53,10 +53,6 @@ namespace llvm { static int minStackSize() { return (2 * stackSlotSize()); } - //! Frame size required to spill all registers plus frame info - static int fullSpillSize() { - return (SPURegisterInfo::getNumArgRegs() * stackSlotSize()); - } //! Convert frame index to stack offset static int FItoStackOffset(int frame_index) { return frame_index * stackSlotSize(); diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 9afdb2b..9b8c2dd 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -275,7 +275,6 @@ namespace { SDNode *emitBuildVector(SDNode *bvNode) { EVT vecVT = bvNode->getValueType(0); - EVT eltVT = vecVT.getVectorElementType(); DebugLoc dl = bvNode->getDebugLoc(); // Check to see if this vector can be represented as a CellSPU immediate @@ -606,18 +605,14 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Base = CurDAG->getTargetConstant(0, N.getValueType()); Index = N; return true; - } else if (Opc == ISD::Register || Opc == ISD::CopyFromReg) { + } else if (Opc == ISD::Register + ||Opc == ISD::CopyFromReg + ||Opc == ISD::UNDEF) { unsigned OpOpc = Op->getOpcode(); if (OpOpc == ISD::STORE || OpOpc == ISD::LOAD) { // Direct load/store without getelementptr - SDValue Addr, Offs; - - // Get the register from CopyFromReg - if (Opc == ISD::CopyFromReg) - Addr = N.getOperand(1); - else - Addr = N; // Register + SDValue Offs; Offs = ((OpOpc == ISD::STORE) ? Op->getOperand(3) : Op->getOperand(2)); @@ -626,7 +621,7 @@ SPUDAGToDAGISel::DFormAddressPredicate(SDNode *Op, SDValue N, SDValue &Base, Offs = CurDAG->getTargetConstant(0, Offs.getValueType()); Base = Offs; - Index = Addr; + Index = N; return true; } } else { diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 081e8d0..ece19b9 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -953,7 +953,8 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + PtrVT, GSDN->getOffset()); const TargetMachine &TM = DAG.getTarget(); SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there is no actual debug info here @@ -1013,22 +1014,26 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, MachineRegisterInfo &RegInfo = MF.getRegInfo(); SPUFunctionInfo *FuncInfo = MF.getInfo(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); - unsigned ArgOffset = SPUFrameInfo::minStackSize(); unsigned ArgRegIdx = 0; unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); + // Add DAG nodes to load the arguments or copy them out of registers. for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { EVT ObjectVT = Ins[ArgNo].VT; unsigned ObjSize = ObjectVT.getSizeInBits()/8; SDValue ArgVal; + CCValAssign &VA = ArgLocs[ArgNo]; - if (ArgRegIdx < NumArgRegs) { + if (VA.isRegLoc()) { const TargetRegisterClass *ArgRegClass; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -1067,14 +1072,14 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, } unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); - RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); + RegInfo.addLiveIn(VA.getLocReg(), VReg); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++ArgRegIdx; } else { // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); ArgOffset += StackSlotSize; @@ -1087,16 +1092,31 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // vararg handling: if (isVarArg) { - // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; + // FIXME: we should be able to query the argument registers from + // tablegen generated code. + static const unsigned ArgRegs[] = { + SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, + SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, + SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, + SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, + SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, + SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, + SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, + SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, + SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, + SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, + SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 + }; + // size of ArgRegs array + unsigned NumArgRegs = 77; + // We will spill (79-3)+1 registers to the stack SmallVector MemOps; // Create the frame slot - for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { FuncInfo->setVarArgsFrameIndex( - MFI->CreateFixedObject(StackSlotSize, ArgOffset, - true, false)); + MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); @@ -1135,6 +1155,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1144,8 +1165,15 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); unsigned NumOps = Outs.size(); unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); - const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); - const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); + + SmallVector ArgLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, + *DAG.getContext()); + // FIXME: allow for other calling conventions + CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); + + const unsigned NumArgRegs = ArgLocs.size(); + // Handy pointer type EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1165,8 +1193,9 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // And the arguments passed on the stack SmallVector MemOpChains; - for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + for (; ArgRegIdx != NumOps; ++ArgRegIdx) { + SDValue Arg = OutVals[ArgRegIdx]; + CCValAssign &VA = ArgLocs[ArgRegIdx]; // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. @@ -1180,24 +1209,8 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::i32: case MVT::i64: case MVT::i128: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::f32: case MVT::f64: - if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); - } else { - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, - false, false, 0)); - ArgOffset += StackSlotSize; - } - break; case MVT::v2i64: case MVT::v2f64: case MVT::v4f32: @@ -1205,7 +1218,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case MVT::v8i16: case MVT::v16i8: if (ArgRegIdx != NumArgRegs) { - RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); + RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); } else { MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, false, false, 0)); @@ -1249,7 +1262,7 @@ SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const GlobalValue *GV = G->getGlobal(); EVT CalleeVT = Callee.getValueType(); SDValue Zero = DAG.getConstant(0, PtrVT); - SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); if (!ST->usingLargeMem()) { // Turn calls to targets that are defined (i.e., have bodies) into BRSL @@ -1355,6 +1368,7 @@ SDValue SPUTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector RVLocs; @@ -1376,7 +1390,7 @@ SPUTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -1746,15 +1760,20 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { unsigned V0Elt = 0; bool monotonic = true; bool rotate = true; + EVT maskVT; // which of the c?d instructions to use if (EltVT == MVT::i8) { V2EltIdx0 = 16; + maskVT = MVT::v16i8; } else if (EltVT == MVT::i16) { V2EltIdx0 = 8; + maskVT = MVT::v8i16; } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { V2EltIdx0 = 4; + maskVT = MVT::v4i32; } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { V2EltIdx0 = 2; + maskVT = MVT::v2i64; } else llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); @@ -1786,7 +1805,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { } else { rotate = false; } - } else if (PrevElt == 0) { + } else if (i == 0) { // First time through, need to keep track of previous element PrevElt = SrcElt; } else { @@ -1798,18 +1817,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (EltsFromV2 == 1 && monotonic) { // Compute mask and shuffle - MachineFunction &MF = DAG.getMachineFunction(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - // Initialize temporary register to 0 - SDValue InitTempReg = - DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); - // Copy register's contents as index in SHUFFLE_MASK: - SDValue ShufMaskOp = - DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, - DAG.getTargetConstant(V2Elt, MVT::i32), - DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); + + // As SHUFFLE_MASK becomes a c?d instruction, feed it an address + // R1 ($sp) is used here only as it is guaranteed to have last bits zero + SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, + DAG.getRegister(SPU::R1, PtrVT), + DAG.getConstant(V2Elt, MVT::i32)); + SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, + maskVT, Pointer); + // Use shuffle mask in SHUFB synthetic instruction: return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, ShufMaskOp); @@ -2056,14 +2073,19 @@ static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); - ConstantSDNode *CN = cast(IdxOp); - assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + // use 0 when the lane to insert to is 'undef' + int64_t Idx=0; + if (IdxOp.getOpcode() != ISD::UNDEF) { + ConstantSDNode *CN = cast(IdxOp); + assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); + Idx = (CN->getSExtValue()); + } EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Use $sp ($1) because it's always 16-byte aligned and it's available: SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, DAG.getRegister(SPU::R1, PtrVT), - DAG.getConstant(CN->getSExtValue(), PtrVT)); + DAG.getConstant(Idx, PtrVT)); SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); SDValue result = @@ -2862,7 +2884,7 @@ SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const case SPUISD::IndirectAddr: { if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { ConstantSDNode *CN = dyn_cast(N->getOperand(1)); - if (CN != 0 && CN->getZExtValue() == 0) { + if (CN != 0 && CN->isNullValue()) { // (SPUindirect (SPUaform , 0), 0) -> // (SPUaform , 0) @@ -3056,12 +3078,10 @@ SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, void SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const { // Default, for the time being, to the base class handler - TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, - Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); } /// isLegalAddressImmediate - Return true if the integer value can be used diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 9ebd442..6d3c90b 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -134,7 +134,6 @@ namespace llvm { EVT VT) const; void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const; @@ -160,6 +159,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -168,6 +168,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; }; } diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index 4c53c98..69aa088 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -164,11 +164,9 @@ SPUInstrInfo::isMoveInstr(const MachineInstr& MI, MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "invalid SPU OR_ or LR instruction!"); - if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) { sourceReg = MI.getOperand(1).getReg(); destReg = MI.getOperand(0).getReg(); return true; - } break; } case SPU::ORv16i8: @@ -251,40 +249,18 @@ SPUInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return 0; } -bool SPUInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const +void SPUInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { // We support cross register class moves for our aliases, such as R3 in any // reg class to any other reg class containing R3. This is required because // we instruction select bitconvert i64 -> f64 as a noop for example, so our // types have no specific meaning. - if (DestRC == SPU::R8CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr8), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R16CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr16), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R32FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf32), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64CRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::R64FPRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRf64), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRr128), DestReg).addReg(SrcReg); - } else if (DestRC == SPU::VECREGRegisterClass) { - BuildMI(MBB, MI, DL, get(SPU::LRv16i8), DestReg).addReg(SrcReg); - } else { - // Attempt to copy unknown/unsupported register class! - return false; - } - - return true; + BuildMI(MBB, I, DL, get(SPU::LRr128), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } void @@ -356,88 +332,6 @@ SPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(opc), DestReg), FrameIdx); } -//! Return true if the specified load or store can be folded -bool -SPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. - unsigned Opc = MI->getOpcode(); - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) - return true; - break; - } - - return false; -} - -/// foldMemoryOperand - SPU, like PPC, can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr * -SPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const -{ - if (Ops.size() != 1) return 0; - - unsigned OpNum = Ops[0]; - unsigned Opc = MI->getOpcode(); - MachineInstr *NewMI = 0; - - switch (Opc) { - case SPU::ORv16i8: - case SPU::ORv8i16: - case SPU::ORv4i32: - case SPU::ORv2i64: - case SPU::ORr8: - case SPU::ORr16: - case SPU::ORr32: - case SPU::ORr64: - case SPU::ORf32: - case SPU::ORf64: - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - if (FrameIndex < SPUFrameInfo::maxFrameOffset()) { - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), - get(SPU::STQDr32)); - - MIB.addReg(InReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - NewMI = addFrameReference(MIB, FrameIndex); - } - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), get(Opc)); - - MIB.addReg(OutReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)); - Opc = (FrameIndex < SPUFrameInfo::maxFrameOffset()) - ? SPU::STQDr32 : SPU::STQXr32; - NewMI = addFrameReference(MIB, FrameIndex); - break; - } - } - - return NewMI; -} - //! Branch analysis /*! \note This code was kiped from PPC. There may be more branch analysis for @@ -554,9 +448,8 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -566,14 +459,14 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { if (Cond.empty()) { // Unconditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR)); MIB.addMBB(TBB); DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); } else { // Conditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); DEBUG(errs() << "Inserted one-way cond branch: "); @@ -581,8 +474,8 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, } return 1; } else { - MachineInstrBuilder MIB = BuildMI(&MBB, dl, get(Cond[0].getImm())); - MachineInstrBuilder MIB2 = BuildMI(&MBB, dl, get(SPU::BR)); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); // Two-way Conditional Branch. MIB.addReg(Cond[1].getReg()).addMBB(TBB); diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h index 6dabd7c..fbb1733 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.h +++ b/lib/Target/CellSPU/SPUInstrInfo.h @@ -23,19 +23,6 @@ namespace llvm { class SPUInstrInfo : public TargetInstrInfoImpl { SPUTargetMachine &TM; const SPURegisterInfo RI; - protected: - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - public: explicit SPUInstrInfo(SPUTargetMachine &tm); @@ -56,12 +43,10 @@ namespace llvm { unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; //! Store a register to a stack slot, based on its register class. virtual void storeRegToStackSlot(MachineBasicBlock &MBB, @@ -77,11 +62,6 @@ namespace llvm { const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - //! Return true if the specified load or store can be folded - virtual - bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - //! Reverses a branch's condition, returning false on success. virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; @@ -94,8 +74,9 @@ namespace llvm { virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; }; } diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 846c7ed..647da30 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -21,7 +21,7 @@ def SPUshufmask : SDNode<"SPUISD::SHUFFLE_MASK", SPU_GenControl, []>; def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPUCallSeq, [SDNPHasChain, SDNPOutFlag]>; def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, - [SDNPHasChain, SDNPOutFlag]>; + [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; //===----------------------------------------------------------------------===// // Operand constraints: //===----------------------------------------------------------------------===// diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index d8937ec..f7cfa42 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -191,33 +191,6 @@ SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget, { } -// SPU's 128-bit registers used for argument passing: -static const unsigned SPU_ArgRegs[] = { - SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, - SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, - SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, - SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, - SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, - SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, - SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, - SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, - SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, - SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, - SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 -}; - -const unsigned * -SPURegisterInfo::getArgRegs() -{ - return SPU_ArgRegs; -} - -unsigned -SPURegisterInfo::getNumArgRegs() -{ - return sizeof(SPU_ArgRegs) / sizeof(SPU_ArgRegs[0]); -} - /// getPointerRegClass - Return the register class to use to hold pointers. /// This is used for addressing modes. const TargetRegisterClass * @@ -251,36 +224,6 @@ SPURegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const return SPU_CalleeSaveRegs; } -const TargetRegisterClass* const* -SPURegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - // Cell ABI Calling Convention - static const TargetRegisterClass * const SPU_CalleeSaveRegClasses[] = { - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, &SPU::GPRCRegClass, &SPU::GPRCRegClass, - &SPU::GPRCRegClass, /* environment pointer */ - &SPU::GPRCRegClass, /* stack pointer */ - &SPU::GPRCRegClass, /* link register */ - 0 /* end */ - }; - - return SPU_CalleeSaveRegClasses; -} - /*! R0 (link register), R1 (stack pointer) and R2 (environment pointer -- this is generally unused) are the Cell's reserved registers diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 0a70318..7a6ae6d 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -49,10 +49,6 @@ namespace llvm { //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; - //! Return the register class array of the callee-saved registers - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - //! Allow for scavenging, so we can get scratch registers when needed. virtual bool requiresRegisterScavenging(const MachineFunction &MF) const { return true; } @@ -90,15 +86,6 @@ namespace llvm { // New methods added: //------------------------------------------------------------------------ - //! Return the array of argument passing registers - /*! - \note The size of this array is returned by getArgRegsSize(). - */ - static const unsigned *getArgRegs(); - - //! Return the size of the argument passing register array - static unsigned getNumArgRegs(); - //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 45a0c84..145568a 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -99,11 +99,12 @@ namespace { ValueSet DefinedValues; ForwardRefMap ForwardRefs; bool is_inline; + unsigned indent_level; public: static char ID; explicit CppWriter(formatted_raw_ostream &o) : - ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false) {} + ModulePass(&ID), Out(o), uniqueNum(0), is_inline(false), indent_level(0){} virtual const char *getPassName() const { return "C++ backend"; } @@ -120,6 +121,11 @@ namespace { void error(const std::string& msg); + + formatted_raw_ostream& nl(formatted_raw_ostream &Out, int delta = 0); + inline void in() { indent_level++; } + inline void out() { if (indent_level >0) indent_level--; } + private: void printLinkageType(GlobalValue::LinkageTypes LT); void printVisibilityType(GlobalValue::VisibilityTypes VisTypes); @@ -153,1857 +159,1856 @@ namespace { void printModuleBody(); }; +} // end anonymous namespace. + +formatted_raw_ostream &CppWriter::nl(formatted_raw_ostream &Out, int delta) { + Out << '\n'; + if (delta >= 0 || indent_level >= unsigned(-delta)) + indent_level += delta; + Out.indent(indent_level); + return Out; +} + +static inline void sanitize(std::string &str) { + for (size_t i = 0; i < str.length(); ++i) + if (!isalnum(str[i]) && str[i] != '_') + str[i] = '_'; +} - static unsigned indent_level = 0; - inline formatted_raw_ostream& nl(formatted_raw_ostream& Out, int delta = 0) { - Out << "\n"; - if (delta >= 0 || indent_level >= unsigned(-delta)) - indent_level += delta; - for (unsigned i = 0; i < indent_level; ++i) - Out << " "; - return Out; +static std::string getTypePrefix(const Type *Ty) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "void_"; + case Type::IntegerTyID: + return "int" + utostr(cast(Ty)->getBitWidth()) + "_"; + case Type::FloatTyID: return "float_"; + case Type::DoubleTyID: return "double_"; + case Type::LabelTyID: return "label_"; + case Type::FunctionTyID: return "func_"; + case Type::StructTyID: return "struct_"; + case Type::ArrayTyID: return "array_"; + case Type::PointerTyID: return "ptr_"; + case Type::VectorTyID: return "packed_"; + case Type::OpaqueTyID: return "opaque_"; + default: return "other_"; } + return "unknown_"; +} - inline void in() { indent_level++; } - inline void out() { if (indent_level >0) indent_level--; } +// Looks up the type in the symbol table and returns a pointer to its name or +// a null pointer if it wasn't found. Note that this isn't the same as the +// Mode::getTypeName function which will return an empty string, not a null +// pointer if the name is not found. +static const std::string * +findTypeName(const TypeSymbolTable& ST, const Type* Ty) { + TypeSymbolTable::const_iterator TI = ST.begin(); + TypeSymbolTable::const_iterator TE = ST.end(); + for (;TI != TE; ++TI) + if (TI->second == Ty) + return &(TI->first); + return 0; +} - inline void - sanitize(std::string& str) { - for (size_t i = 0; i < str.length(); ++i) - if (!isalnum(str[i]) && str[i] != '_') - str[i] = '_'; - } +void CppWriter::error(const std::string& msg) { + report_fatal_error(msg); +} - inline std::string - getTypePrefix(const Type* Ty ) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "void_"; - case Type::IntegerTyID: - return std::string("int") + utostr(cast(Ty)->getBitWidth()) + - "_"; - case Type::FloatTyID: return "float_"; - case Type::DoubleTyID: return "double_"; - case Type::LabelTyID: return "label_"; - case Type::FunctionTyID: return "func_"; - case Type::StructTyID: return "struct_"; - case Type::ArrayTyID: return "array_"; - case Type::PointerTyID: return "ptr_"; - case Type::VectorTyID: return "packed_"; - case Type::OpaqueTyID: return "opaque_"; - default: return "other_"; - } - return "unknown_"; - } - - // Looks up the type in the symbol table and returns a pointer to its name or - // a null pointer if it wasn't found. Note that this isn't the same as the - // Mode::getTypeName function which will return an empty string, not a null - // pointer if the name is not found. - inline const std::string* - findTypeName(const TypeSymbolTable& ST, const Type* Ty) { - TypeSymbolTable::const_iterator TI = ST.begin(); - TypeSymbolTable::const_iterator TE = ST.end(); - for (;TI != TE; ++TI) - if (TI->second == Ty) - return &(TI->first); - return 0; - } - - void CppWriter::error(const std::string& msg) { - report_fatal_error(msg); - } - - // printCFP - Print a floating point constant .. very carefully :) - // This makes sure that conversion to/from floating yields the same binary - // result so that we don't lose precision. - void CppWriter::printCFP(const ConstantFP *CFP) { - bool ignored; - APFloat APF = APFloat(CFP->getValueAPF()); // copy - if (CFP->getType() == Type::getFloatTy(CFP->getContext())) - APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "ConstantFP::get(mod->getContext(), "; - Out << "APFloat("; +// printCFP - Print a floating point constant .. very carefully :) +// This makes sure that conversion to/from floating yields the same binary +// result so that we don't lose precision. +void CppWriter::printCFP(const ConstantFP *CFP) { + bool ignored; + APFloat APF = APFloat(CFP->getValueAPF()); // copy + if (CFP->getType() == Type::getFloatTy(CFP->getContext())) + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); + Out << "ConstantFP::get(mod->getContext(), "; + Out << "APFloat("; #if HAVE_PRINTF_A - char Buffer[100]; - sprintf(Buffer, "%A", APF.convertToDouble()); - if ((!strncmp(Buffer, "0x", 2) || - !strncmp(Buffer, "-0x", 3) || - !strncmp(Buffer, "+0x", 3)) && - APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(" << Buffer << ")"; - else - Out << "BitsToFloat((float)" << Buffer << ")"; - Out << ")"; - } else { + char Buffer[100]; + sprintf(Buffer, "%A", APF.convertToDouble()); + if ((!strncmp(Buffer, "0x", 2) || + !strncmp(Buffer, "-0x", 3) || + !strncmp(Buffer, "+0x", 3)) && + APF.bitwiseIsEqual(APFloat(atof(Buffer)))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(" << Buffer << ")"; + else + Out << "BitsToFloat((float)" << Buffer << ")"; + Out << ")"; + } else { #endif - std::string StrVal = ftostr(CFP->getValueAPF()); - - while (StrVal[0] == ' ') - StrVal.erase(StrVal.begin()); - - // Check to make sure that the stringized number is not some string like - // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. - if (((StrVal[0] >= '0' && StrVal[0] <= '9') || - ((StrVal[0] == '-' || StrVal[0] == '+') && - (StrVal[1] >= '0' && StrVal[1] <= '9'))) && - (CFP->isExactlyValue(atof(StrVal.c_str())))) { - if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << StrVal; - else - Out << StrVal << "f"; - } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) - Out << "BitsToDouble(0x" - << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) - << "ULL) /* " << StrVal << " */"; + std::string StrVal = ftostr(CFP->getValueAPF()); + + while (StrVal[0] == ' ') + StrVal.erase(StrVal.begin()); + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN. Check that the string matches the "[-+]?[0-9]" regex. + if (((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) && + (CFP->isExactlyValue(atof(StrVal.c_str())))) { + if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << StrVal; else - Out << "BitsToFloat(0x" - << utohexstr((uint32_t)CFP->getValueAPF(). - bitcastToAPInt().getZExtValue()) - << "U) /* " << StrVal << " */"; - Out << ")"; + Out << StrVal << "f"; + } else if (CFP->getType() == Type::getDoubleTy(CFP->getContext())) + Out << "BitsToDouble(0x" + << utohexstr(CFP->getValueAPF().bitcastToAPInt().getZExtValue()) + << "ULL) /* " << StrVal << " */"; + else + Out << "BitsToFloat(0x" + << utohexstr((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue()) + << "U) /* " << StrVal << " */"; + Out << ")"; #if HAVE_PRINTF_A - } + } #endif - Out << ")"; + Out << ")"; +} + +void CppWriter::printCallingConv(CallingConv::ID cc){ + // Print the calling convention. + switch (cc) { + case CallingConv::C: Out << "CallingConv::C"; break; + case CallingConv::Fast: Out << "CallingConv::Fast"; break; + case CallingConv::Cold: Out << "CallingConv::Cold"; break; + case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; + default: Out << cc; break; } +} - void CppWriter::printCallingConv(CallingConv::ID cc){ - // Print the calling convention. - switch (cc) { - case CallingConv::C: Out << "CallingConv::C"; break; - case CallingConv::Fast: Out << "CallingConv::Fast"; break; - case CallingConv::Cold: Out << "CallingConv::Cold"; break; - case CallingConv::FirstTargetCC: Out << "CallingConv::FirstTargetCC"; break; - default: Out << cc; break; - } +void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { + switch (LT) { + case GlobalValue::InternalLinkage: + Out << "GlobalValue::InternalLinkage"; break; + case GlobalValue::PrivateLinkage: + Out << "GlobalValue::PrivateLinkage"; break; + case GlobalValue::LinkerPrivateLinkage: + Out << "GlobalValue::LinkerPrivateLinkage"; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "GlobalValue::LinkerPrivateWeakLinkage"; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "GlobalValue::AvailableExternallyLinkage "; break; + case GlobalValue::LinkOnceAnyLinkage: + Out << "GlobalValue::LinkOnceAnyLinkage "; break; + case GlobalValue::LinkOnceODRLinkage: + Out << "GlobalValue::LinkOnceODRLinkage "; break; + case GlobalValue::WeakAnyLinkage: + Out << "GlobalValue::WeakAnyLinkage"; break; + case GlobalValue::WeakODRLinkage: + Out << "GlobalValue::WeakODRLinkage"; break; + case GlobalValue::AppendingLinkage: + Out << "GlobalValue::AppendingLinkage"; break; + case GlobalValue::ExternalLinkage: + Out << "GlobalValue::ExternalLinkage"; break; + case GlobalValue::DLLImportLinkage: + Out << "GlobalValue::DLLImportLinkage"; break; + case GlobalValue::DLLExportLinkage: + Out << "GlobalValue::DLLExportLinkage"; break; + case GlobalValue::ExternalWeakLinkage: + Out << "GlobalValue::ExternalWeakLinkage"; break; + case GlobalValue::CommonLinkage: + Out << "GlobalValue::CommonLinkage"; break; } +} - void CppWriter::printLinkageType(GlobalValue::LinkageTypes LT) { - switch (LT) { - case GlobalValue::InternalLinkage: - Out << "GlobalValue::InternalLinkage"; break; - case GlobalValue::PrivateLinkage: - Out << "GlobalValue::PrivateLinkage"; break; - case GlobalValue::LinkerPrivateLinkage: - Out << "GlobalValue::LinkerPrivateLinkage"; break; - case GlobalValue::AvailableExternallyLinkage: - Out << "GlobalValue::AvailableExternallyLinkage "; break; - case GlobalValue::LinkOnceAnyLinkage: - Out << "GlobalValue::LinkOnceAnyLinkage "; break; - case GlobalValue::LinkOnceODRLinkage: - Out << "GlobalValue::LinkOnceODRLinkage "; break; - case GlobalValue::WeakAnyLinkage: - Out << "GlobalValue::WeakAnyLinkage"; break; - case GlobalValue::WeakODRLinkage: - Out << "GlobalValue::WeakODRLinkage"; break; - case GlobalValue::AppendingLinkage: - Out << "GlobalValue::AppendingLinkage"; break; - case GlobalValue::ExternalLinkage: - Out << "GlobalValue::ExternalLinkage"; break; - case GlobalValue::DLLImportLinkage: - Out << "GlobalValue::DLLImportLinkage"; break; - case GlobalValue::DLLExportLinkage: - Out << "GlobalValue::DLLExportLinkage"; break; - case GlobalValue::ExternalWeakLinkage: - Out << "GlobalValue::ExternalWeakLinkage"; break; - case GlobalValue::CommonLinkage: - Out << "GlobalValue::CommonLinkage"; break; - } +void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { + switch (VisType) { + default: llvm_unreachable("Unknown GVar visibility"); + case GlobalValue::DefaultVisibility: + Out << "GlobalValue::DefaultVisibility"; + break; + case GlobalValue::HiddenVisibility: + Out << "GlobalValue::HiddenVisibility"; + break; + case GlobalValue::ProtectedVisibility: + Out << "GlobalValue::ProtectedVisibility"; + break; } +} - void CppWriter::printVisibilityType(GlobalValue::VisibilityTypes VisType) { - switch (VisType) { - default: llvm_unreachable("Unknown GVar visibility"); - case GlobalValue::DefaultVisibility: - Out << "GlobalValue::DefaultVisibility"; - break; - case GlobalValue::HiddenVisibility: - Out << "GlobalValue::HiddenVisibility"; - break; - case GlobalValue::ProtectedVisibility: - Out << "GlobalValue::ProtectedVisibility"; - break; +// printEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +void CppWriter::printEscapedString(const std::string &Str) { + for (unsigned i = 0, e = Str.size(); i != e; ++i) { + unsigned char C = Str[i]; + if (isprint(C) && C != '"' && C != '\\') { + Out << C; + } else { + Out << "\\x" + << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) + << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); } } +} - // printEscapedString - Print each character of the specified string, escaping - // it if it is not printable or if it is an escape char. - void CppWriter::printEscapedString(const std::string &Str) { - for (unsigned i = 0, e = Str.size(); i != e; ++i) { - unsigned char C = Str[i]; - if (isprint(C) && C != '"' && C != '\\') { - Out << C; - } else { - Out << "\\x" - << (char) ((C/16 < 10) ? ( C/16 +'0') : ( C/16 -10+'A')) - << (char)(((C&15) < 10) ? ((C&15)+'0') : ((C&15)-10+'A')); - } +std::string CppWriter::getCppName(const Type* Ty) { + // First, handle the primitive types .. easy + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; + case Type::IntegerTyID: { + unsigned BitWidth = cast(Ty)->getBitWidth(); + return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; + } + case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; + case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; + case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; + case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; + default: + error("Invalid primitive type"); + break; } + // shouldn't be returned, but make it sensible + return "Type::getVoidTy(mod->getContext())"; } - std::string CppWriter::getCppName(const Type* Ty) { - // First, handle the primitive types .. easy - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) { - switch (Ty->getTypeID()) { - case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())"; - case Type::IntegerTyID: { - unsigned BitWidth = cast(Ty)->getBitWidth(); - return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")"; - } - case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())"; - case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())"; - case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())"; - case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())"; - default: - error("Invalid primitive type"); - break; - } - // shouldn't be returned, but make it sensible - return "Type::getVoidTy(mod->getContext())"; - } + // Now, see if we've seen the type before and return that + TypeMap::iterator I = TypeNames.find(Ty); + if (I != TypeNames.end()) + return I->second; + + // Okay, let's build a new name for this type. Start with a prefix + const char* prefix = 0; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: prefix = "FuncTy_"; break; + case Type::StructTyID: prefix = "StructTy_"; break; + case Type::ArrayTyID: prefix = "ArrayTy_"; break; + case Type::PointerTyID: prefix = "PointerTy_"; break; + case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; + case Type::VectorTyID: prefix = "VectorTy_"; break; + default: prefix = "OtherTy_"; break; // prevent breakage + } - // Now, see if we've seen the type before and return that - TypeMap::iterator I = TypeNames.find(Ty); - if (I != TypeNames.end()) - return I->second; + // See if the type has a name in the symboltable and build accordingly + const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); + std::string name; + if (tName) + name = std::string(prefix) + *tName; + else + name = std::string(prefix) + utostr(uniqueNum++); + sanitize(name); + + // Save the name + return TypeNames[Ty] = name; +} - // Okay, let's build a new name for this type. Start with a prefix - const char* prefix = 0; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: prefix = "FuncTy_"; break; - case Type::StructTyID: prefix = "StructTy_"; break; - case Type::ArrayTyID: prefix = "ArrayTy_"; break; - case Type::PointerTyID: prefix = "PointerTy_"; break; - case Type::OpaqueTyID: prefix = "OpaqueTy_"; break; - case Type::VectorTyID: prefix = "VectorTy_"; break; - default: prefix = "OtherTy_"; break; // prevent breakage - } +void CppWriter::printCppName(const Type* Ty) { + printEscapedString(getCppName(Ty)); +} - // See if the type has a name in the symboltable and build accordingly - const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty); - std::string name; - if (tName) - name = std::string(prefix) + *tName; - else - name = std::string(prefix) + utostr(uniqueNum++); - sanitize(name); - - // Save the name - return TypeNames[Ty] = name; - } - - void CppWriter::printCppName(const Type* Ty) { - printEscapedString(getCppName(Ty)); - } - - std::string CppWriter::getCppName(const Value* val) { - std::string name; - ValueMap::iterator I = ValueNames.find(val); - if (I != ValueNames.end() && I->first == val) - return I->second; - - if (const GlobalVariable* GV = dyn_cast(val)) { - name = std::string("gvar_") + - getTypePrefix(GV->getType()->getElementType()); - } else if (isa(val)) { - name = std::string("func_"); - } else if (const Constant* C = dyn_cast(val)) { - name = std::string("const_") + getTypePrefix(C->getType()); - } else if (const Argument* Arg = dyn_cast(val)) { - if (is_inline) { - unsigned argNum = std::distance(Arg->getParent()->arg_begin(), - Function::const_arg_iterator(Arg)) + 1; - name = std::string("arg_") + utostr(argNum); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; - } else { - name = getTypePrefix(val->getType()); - } +std::string CppWriter::getCppName(const Value* val) { + std::string name; + ValueMap::iterator I = ValueNames.find(val); + if (I != ValueNames.end() && I->first == val) + return I->second; + + if (const GlobalVariable* GV = dyn_cast(val)) { + name = std::string("gvar_") + + getTypePrefix(GV->getType()->getElementType()); + } else if (isa(val)) { + name = std::string("func_"); + } else if (const Constant* C = dyn_cast(val)) { + name = std::string("const_") + getTypePrefix(C->getType()); + } else if (const Argument* Arg = dyn_cast(val)) { + if (is_inline) { + unsigned argNum = std::distance(Arg->getParent()->arg_begin(), + Function::const_arg_iterator(Arg)) + 1; + name = std::string("arg_") + utostr(argNum); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; } else { name = getTypePrefix(val->getType()); } - if (val->hasName()) - name += val->getName(); - else - name += utostr(uniqueNum++); - sanitize(name); - NameSet::iterator NI = UsedNames.find(name); - if (NI != UsedNames.end()) - name += std::string("_") + utostr(uniqueNum++); - UsedNames.insert(name); - return ValueNames[val] = name; + } else { + name = getTypePrefix(val->getType()); } + if (val->hasName()) + name += val->getName(); + else + name += utostr(uniqueNum++); + sanitize(name); + NameSet::iterator NI = UsedNames.find(name); + if (NI != UsedNames.end()) + name += std::string("_") + utostr(uniqueNum++); + UsedNames.insert(name); + return ValueNames[val] = name; +} - void CppWriter::printCppName(const Value* val) { - printEscapedString(getCppName(val)); - } +void CppWriter::printCppName(const Value* val) { + printEscapedString(getCppName(val)); +} - void CppWriter::printAttributes(const AttrListPtr &PAL, - const std::string &name) { - Out << "AttrListPtr " << name << "_PAL;"; - nl(Out); - if (!PAL.isEmpty()) { - Out << '{'; in(); nl(Out); - Out << "SmallVector Attrs;"; nl(Out); - Out << "AttributeWithIndex PAWI;"; nl(Out); - for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { - unsigned index = PAL.getSlot(i).Index; - Attributes attrs = PAL.getSlot(i).Attrs; - Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; +void CppWriter::printAttributes(const AttrListPtr &PAL, + const std::string &name) { + Out << "AttrListPtr " << name << "_PAL;"; + nl(Out); + if (!PAL.isEmpty()) { + Out << '{'; in(); nl(Out); + Out << "SmallVector Attrs;"; nl(Out); + Out << "AttributeWithIndex PAWI;"; nl(Out); + for (unsigned i = 0; i < PAL.getNumSlots(); ++i) { + unsigned index = PAL.getSlot(i).Index; + Attributes attrs = PAL.getSlot(i).Attrs; + Out << "PAWI.Index = " << index << "U; PAWI.Attrs = 0 "; #define HANDLE_ATTR(X) \ - if (attrs & Attribute::X) \ - Out << " | Attribute::" #X; \ - attrs &= ~Attribute::X; - - HANDLE_ATTR(SExt); - HANDLE_ATTR(ZExt); - HANDLE_ATTR(NoReturn); - HANDLE_ATTR(InReg); - HANDLE_ATTR(StructRet); - HANDLE_ATTR(NoUnwind); - HANDLE_ATTR(NoAlias); - HANDLE_ATTR(ByVal); - HANDLE_ATTR(Nest); - HANDLE_ATTR(ReadNone); - HANDLE_ATTR(ReadOnly); - HANDLE_ATTR(InlineHint); - HANDLE_ATTR(NoInline); - HANDLE_ATTR(AlwaysInline); - HANDLE_ATTR(OptimizeForSize); - HANDLE_ATTR(StackProtect); - HANDLE_ATTR(StackProtectReq); - HANDLE_ATTR(NoCapture); + if (attrs & Attribute::X) \ + Out << " | Attribute::" #X; \ + attrs &= ~Attribute::X; + + HANDLE_ATTR(SExt); + HANDLE_ATTR(ZExt); + HANDLE_ATTR(NoReturn); + HANDLE_ATTR(InReg); + HANDLE_ATTR(StructRet); + HANDLE_ATTR(NoUnwind); + HANDLE_ATTR(NoAlias); + HANDLE_ATTR(ByVal); + HANDLE_ATTR(Nest); + HANDLE_ATTR(ReadNone); + HANDLE_ATTR(ReadOnly); + HANDLE_ATTR(InlineHint); + HANDLE_ATTR(NoInline); + HANDLE_ATTR(AlwaysInline); + HANDLE_ATTR(OptimizeForSize); + HANDLE_ATTR(StackProtect); + HANDLE_ATTR(StackProtectReq); + HANDLE_ATTR(NoCapture); #undef HANDLE_ATTR - assert(attrs == 0 && "Unhandled attribute!"); - Out << ";"; - nl(Out); - Out << "Attrs.push_back(PAWI);"; - nl(Out); - } - Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + assert(attrs == 0 && "Unhandled attribute!"); + Out << ";"; + nl(Out); + Out << "Attrs.push_back(PAWI);"; nl(Out); - out(); nl(Out); - Out << '}'; nl(Out); } + Out << name << "_PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());"; + nl(Out); + out(); nl(Out); + Out << '}'; nl(Out); } +} - bool CppWriter::printTypeInternal(const Type* Ty) { - // We don't print definitions for primitive types - if (Ty->isPrimitiveType() || Ty->isIntegerTy()) - return false; - - // If we already defined this type, we don't need to define it again. - if (DefinedTypes.find(Ty) != DefinedTypes.end()) - return false; - - // Everything below needs the name for the type so get it now. - std::string typeName(getCppName(Ty)); - - // Search the type stack for recursion. If we find it, then generate this - // as an OpaqueType, but make sure not to do this multiple times because - // the type could appear in multiple places on the stack. Once the opaque - // definition is issued, it must not be re-issued. Consequently we have to - // check the UnresolvedTypes list as well. - TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), - Ty); - if (TI != TypeStack.end()) { - TypeMap::const_iterator I = UnresolvedTypes.find(Ty); - if (I == UnresolvedTypes.end()) { - Out << "PATypeHolder " << typeName; - Out << "_fwd = OpaqueType::get(mod->getContext());"; - nl(Out); - UnresolvedTypes[Ty] = typeName; - } - return true; - } +bool CppWriter::printTypeInternal(const Type* Ty) { + // We don't print definitions for primitive types + if (Ty->isPrimitiveType() || Ty->isIntegerTy()) + return false; - // We're going to print a derived type which, by definition, contains other - // types. So, push this one we're printing onto the type stack to assist with - // recursive definitions. - TypeStack.push_back(Ty); + // If we already defined this type, we don't need to define it again. + if (DefinedTypes.find(Ty) != DefinedTypes.end()) + return false; - // Print the type definition - switch (Ty->getTypeID()) { - case Type::FunctionTyID: { - const FunctionType* FT = cast(Ty); - Out << "std::vector" << typeName << "_args;"; + // Everything below needs the name for the type so get it now. + std::string typeName(getCppName(Ty)); + + // Search the type stack for recursion. If we find it, then generate this + // as an OpaqueType, but make sure not to do this multiple times because + // the type could appear in multiple places on the stack. Once the opaque + // definition is issued, it must not be re-issued. Consequently we have to + // check the UnresolvedTypes list as well. + TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(), + Ty); + if (TI != TypeStack.end()) { + TypeMap::const_iterator I = UnresolvedTypes.find(Ty); + if (I == UnresolvedTypes.end()) { + Out << "PATypeHolder " << typeName; + Out << "_fwd = OpaqueType::get(mod->getContext());"; nl(Out); - FunctionType::param_iterator PI = FT->param_begin(); - FunctionType::param_iterator PE = FT->param_end(); - for (; PI != PE; ++PI) { - const Type* argTy = static_cast(*PI); - bool isForward = printTypeInternal(argTy); - std::string argName(getCppName(argTy)); - Out << typeName << "_args.push_back(" << argName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - bool isForward = printTypeInternal(FT->getReturnType()); - std::string retTypeName(getCppName(FT->getReturnType())); - Out << "FunctionType* " << typeName << " = FunctionType::get("; - in(); nl(Out) << "/*Result=*/" << retTypeName; + UnresolvedTypes[Ty] = typeName; + } + return true; + } + + // We're going to print a derived type which, by definition, contains other + // types. So, push this one we're printing onto the type stack to assist with + // recursive definitions. + TypeStack.push_back(Ty); + + // Print the type definition + switch (Ty->getTypeID()) { + case Type::FunctionTyID: { + const FunctionType* FT = cast(Ty); + Out << "std::vector" << typeName << "_args;"; + nl(Out); + FunctionType::param_iterator PI = FT->param_begin(); + FunctionType::param_iterator PE = FT->param_end(); + for (; PI != PE; ++PI) { + const Type* argTy = static_cast(*PI); + bool isForward = printTypeInternal(argTy); + std::string argName(getCppName(argTy)); + Out << typeName << "_args.push_back(" << argName; if (isForward) Out << "_fwd"; - Out << ","; - nl(Out) << "/*Params=*/" << typeName << "_args,"; - nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; - out(); - nl(Out); - break; - } - case Type::StructTyID: { - const StructType* ST = cast(Ty); - Out << "std::vector" << typeName << "_fields;"; - nl(Out); - StructType::element_iterator EI = ST->element_begin(); - StructType::element_iterator EE = ST->element_end(); - for (; EI != EE; ++EI) { - const Type* fieldTy = static_cast(*EI); - bool isForward = printTypeInternal(fieldTy); - std::string fieldName(getCppName(fieldTy)); - Out << typeName << "_fields.push_back(" << fieldName; - if (isForward) - Out << "_fwd"; - Out << ");"; - nl(Out); - } - Out << "StructType* " << typeName << " = StructType::get(" - << "mod->getContext(), " - << typeName << "_fields, /*isPacked=*/" - << (ST->isPacked() ? "true" : "false") << ");"; - nl(Out); - break; - } - case Type::ArrayTyID: { - const ArrayType* AT = cast(Ty); - const Type* ET = AT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "ArrayType* " << typeName << " = ArrayType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(AT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::PointerTyID: { - const PointerType* PT = cast(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "PointerType* " << typeName << " = PointerType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getAddressSpace()) << ");"; - nl(Out); - break; - } - case Type::VectorTyID: { - const VectorType* PT = cast(Ty); - const Type* ET = PT->getElementType(); - bool isForward = printTypeInternal(ET); - std::string elemName(getCppName(ET)); - Out << "VectorType* " << typeName << " = VectorType::get(" - << elemName << (isForward ? "_fwd" : "") - << ", " << utostr(PT->getNumElements()) << ");"; - nl(Out); - break; - } - case Type::OpaqueTyID: { - Out << "OpaqueType* " << typeName; - Out << " = OpaqueType::get(mod->getContext());"; + Out << ");"; nl(Out); - break; - } - default: - error("Invalid TypeID"); } - - // If the type had a name, make sure we recreate it. - const std::string* progTypeName = - findTypeName(TheModule->getTypeSymbolTable(),Ty); - if (progTypeName) { - Out << "mod->addTypeName(\"" << *progTypeName << "\", " - << typeName << ");"; + bool isForward = printTypeInternal(FT->getReturnType()); + std::string retTypeName(getCppName(FT->getReturnType())); + Out << "FunctionType* " << typeName << " = FunctionType::get("; + in(); nl(Out) << "/*Result=*/" << retTypeName; + if (isForward) + Out << "_fwd"; + Out << ","; + nl(Out) << "/*Params=*/" << typeName << "_args,"; + nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");"; + out(); + nl(Out); + break; + } + case Type::StructTyID: { + const StructType* ST = cast(Ty); + Out << "std::vector" << typeName << "_fields;"; + nl(Out); + StructType::element_iterator EI = ST->element_begin(); + StructType::element_iterator EE = ST->element_end(); + for (; EI != EE; ++EI) { + const Type* fieldTy = static_cast(*EI); + bool isForward = printTypeInternal(fieldTy); + std::string fieldName(getCppName(fieldTy)); + Out << typeName << "_fields.push_back(" << fieldName; + if (isForward) + Out << "_fwd"; + Out << ");"; nl(Out); } + Out << "StructType* " << typeName << " = StructType::get(" + << "mod->getContext(), " + << typeName << "_fields, /*isPacked=*/" + << (ST->isPacked() ? "true" : "false") << ");"; + nl(Out); + break; + } + case Type::ArrayTyID: { + const ArrayType* AT = cast(Ty); + const Type* ET = AT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "ArrayType* " << typeName << " = ArrayType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(AT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::PointerTyID: { + const PointerType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "PointerType* " << typeName << " = PointerType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getAddressSpace()) << ");"; + nl(Out); + break; + } + case Type::VectorTyID: { + const VectorType* PT = cast(Ty); + const Type* ET = PT->getElementType(); + bool isForward = printTypeInternal(ET); + std::string elemName(getCppName(ET)); + Out << "VectorType* " << typeName << " = VectorType::get(" + << elemName << (isForward ? "_fwd" : "") + << ", " << utostr(PT->getNumElements()) << ");"; + nl(Out); + break; + } + case Type::OpaqueTyID: { + Out << "OpaqueType* " << typeName; + Out << " = OpaqueType::get(mod->getContext());"; + nl(Out); + break; + } + default: + error("Invalid TypeID"); + } - // Pop us off the type stack - TypeStack.pop_back(); + // If the type had a name, make sure we recreate it. + const std::string* progTypeName = + findTypeName(TheModule->getTypeSymbolTable(),Ty); + if (progTypeName) { + Out << "mod->addTypeName(\"" << *progTypeName << "\", " + << typeName << ");"; + nl(Out); + } - // Indicate that this type is now defined. - DefinedTypes.insert(Ty); + // Pop us off the type stack + TypeStack.pop_back(); - // Early resolve as many unresolved types as possible. Search the unresolved - // types map for the type we just printed. Now that its definition is complete - // we can resolve any previous references to it. This prevents a cascade of - // unresolved types. - TypeMap::iterator I = UnresolvedTypes.find(Ty); - if (I != UnresolvedTypes.end()) { - Out << "cast(" << I->second - << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; - nl(Out); - Out << I->second << " = cast<"; - switch (Ty->getTypeID()) { - case Type::FunctionTyID: Out << "FunctionType"; break; - case Type::ArrayTyID: Out << "ArrayType"; break; - case Type::StructTyID: Out << "StructType"; break; - case Type::VectorTyID: Out << "VectorType"; break; - case Type::PointerTyID: Out << "PointerType"; break; - case Type::OpaqueTyID: Out << "OpaqueType"; break; - default: Out << "NoSuchDerivedType"; break; - } - Out << ">(" << I->second << "_fwd.get());"; - nl(Out); nl(Out); - UnresolvedTypes.erase(I); - } + // Indicate that this type is now defined. + DefinedTypes.insert(Ty); - // Finally, separate the type definition from other with a newline. + // Early resolve as many unresolved types as possible. Search the unresolved + // types map for the type we just printed. Now that its definition is complete + // we can resolve any previous references to it. This prevents a cascade of + // unresolved types. + TypeMap::iterator I = UnresolvedTypes.find(Ty); + if (I != UnresolvedTypes.end()) { + Out << "cast(" << I->second + << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");"; nl(Out); - - // We weren't a recursive type - return false; + Out << I->second << " = cast<"; + switch (Ty->getTypeID()) { + case Type::FunctionTyID: Out << "FunctionType"; break; + case Type::ArrayTyID: Out << "ArrayType"; break; + case Type::StructTyID: Out << "StructType"; break; + case Type::VectorTyID: Out << "VectorType"; break; + case Type::PointerTyID: Out << "PointerType"; break; + case Type::OpaqueTyID: Out << "OpaqueType"; break; + default: Out << "NoSuchDerivedType"; break; + } + Out << ">(" << I->second << "_fwd.get());"; + nl(Out); nl(Out); + UnresolvedTypes.erase(I); } - // Prints a type definition. Returns true if it could not resolve all the - // types in the definition but had to use a forward reference. - void CppWriter::printType(const Type* Ty) { - assert(TypeStack.empty()); - TypeStack.clear(); - printTypeInternal(Ty); - assert(TypeStack.empty()); - } - - void CppWriter::printTypes(const Module* M) { - // Walk the symbol table and print out all its types - const TypeSymbolTable& symtab = M->getTypeSymbolTable(); - for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); - TI != TE; ++TI) { - - // For primitive types and types already defined, just add a name - TypeMap::const_iterator TNI = TypeNames.find(TI->second); - if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || - TNI != TypeNames.end()) { - Out << "mod->addTypeName(\""; - printEscapedString(TI->first); - Out << "\", " << getCppName(TI->second) << ");"; - nl(Out); - // For everything else, define the type - } else { - printType(TI->second); - } - } + // Finally, separate the type definition from other with a newline. + nl(Out); - // Add all of the global variables to the value table... - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - if (I->hasInitializer()) - printType(I->getInitializer()->getType()); - printType(I->getType()); + // We weren't a recursive type + return false; +} + +// Prints a type definition. Returns true if it could not resolve all the +// types in the definition but had to use a forward reference. +void CppWriter::printType(const Type* Ty) { + assert(TypeStack.empty()); + TypeStack.clear(); + printTypeInternal(Ty); + assert(TypeStack.empty()); +} + +void CppWriter::printTypes(const Module* M) { + // Walk the symbol table and print out all its types + const TypeSymbolTable& symtab = M->getTypeSymbolTable(); + for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end(); + TI != TE; ++TI) { + + // For primitive types and types already defined, just add a name + TypeMap::const_iterator TNI = TypeNames.find(TI->second); + if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() || + TNI != TypeNames.end()) { + Out << "mod->addTypeName(\""; + printEscapedString(TI->first); + Out << "\", " << getCppName(TI->second) << ");"; + nl(Out); + // For everything else, define the type + } else { + printType(TI->second); } + } - // Add all the functions to the table - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - printType(FI->getReturnType()); - printType(FI->getFunctionType()); - // Add all the function arguments - for (Function::const_arg_iterator AI = FI->arg_begin(), - AE = FI->arg_end(); AI != AE; ++AI) { - printType(AI->getType()); - } + // Add all of the global variables to the value table... + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (I->hasInitializer()) + printType(I->getInitializer()->getType()); + printType(I->getType()); + } - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - printType(BB->getType()); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - printType(I->getType()); - for (unsigned i = 0; i < I->getNumOperands(); ++i) - printType(I->getOperand(i)->getType()); - } + // Add all the functions to the table + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + printType(FI->getReturnType()); + printType(FI->getFunctionType()); + // Add all the function arguments + for (Function::const_arg_iterator AI = FI->arg_begin(), + AE = FI->arg_end(); AI != AE; ++AI) { + printType(AI->getType()); + } + + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + printType(BB->getType()); + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + printType(I->getType()); + for (unsigned i = 0; i < I->getNumOperands(); ++i) + printType(I->getOperand(i)->getType()); } } } +} - // printConstant - Print out a constant pool entry... - void CppWriter::printConstant(const Constant *CV) { - // First, if the constant is actually a GlobalValue (variable or function) - // or its already in the constant list then we've printed it already and we - // can just return. - if (isa(CV) || ValueNames.find(CV) != ValueNames.end()) - return; +// printConstant - Print out a constant pool entry... +void CppWriter::printConstant(const Constant *CV) { + // First, if the constant is actually a GlobalValue (variable or function) + // or its already in the constant list then we've printed it already and we + // can just return. + if (isa(CV) || ValueNames.find(CV) != ValueNames.end()) + return; - std::string constName(getCppName(CV)); - std::string typeName(getCppName(CV->getType())); + std::string constName(getCppName(CV)); + std::string typeName(getCppName(CV->getType())); - if (isa(CV)) { - // Skip variables and functions, we emit them elsewhere - return; - } + if (isa(CV)) { + // Skip variables and functions, we emit them elsewhere + return; + } - if (const ConstantInt *CI = dyn_cast(CV)) { - std::string constValue = CI->getValue().toString(10, true); - Out << "ConstantInt* " << constName - << " = ConstantInt::get(mod->getContext(), APInt(" - << cast(CI->getType())->getBitWidth() - << ", StringRef(\"" << constValue << "\"), 10));"; - } else if (isa(CV)) { - Out << "ConstantAggregateZero* " << constName - << " = ConstantAggregateZero::get(" << typeName << ");"; - } else if (isa(CV)) { - Out << "ConstantPointerNull* " << constName - << " = ConstantPointerNull::get(" << typeName << ");"; - } else if (const ConstantFP *CFP = dyn_cast(CV)) { - Out << "ConstantFP* " << constName << " = "; - printCFP(CFP); - Out << ";"; - } else if (const ConstantArray *CA = dyn_cast(CV)) { - if (CA->isString() && - CA->getType()->getElementType() == - Type::getInt8Ty(CA->getContext())) { - Out << "Constant* " << constName << - " = ConstantArray::get(mod->getContext(), \""; - std::string tmp = CA->getAsString(); - bool nullTerminate = false; - if (tmp[tmp.length()-1] == 0) { - tmp.erase(tmp.length()-1); - nullTerminate = true; - } - printEscapedString(tmp); - // Determine if we want null termination or not. - if (nullTerminate) - Out << "\", true"; // Indicate that the null terminator should be - // added. - else - Out << "\", false";// No null terminator - Out << ");"; - } else { - Out << "std::vector " << constName << "_elems;"; - nl(Out); - unsigned N = CA->getNumOperands(); - for (unsigned i = 0; i < N; ++i) { - printConstant(CA->getOperand(i)); // recurse to print operands - Out << constName << "_elems.push_back(" - << getCppName(CA->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName << " = ConstantArray::get(" - << typeName << ", " << constName << "_elems);"; - } - } else if (const ConstantStruct *CS = dyn_cast(CV)) { - Out << "std::vector " << constName << "_fields;"; - nl(Out); - unsigned N = CS->getNumOperands(); - for (unsigned i = 0; i < N; i++) { - printConstant(CS->getOperand(i)); - Out << constName << "_fields.push_back(" - << getCppName(CS->getOperand(i)) << ");"; - nl(Out); + if (const ConstantInt *CI = dyn_cast(CV)) { + std::string constValue = CI->getValue().toString(10, true); + Out << "ConstantInt* " << constName + << " = ConstantInt::get(mod->getContext(), APInt(" + << cast(CI->getType())->getBitWidth() + << ", StringRef(\"" << constValue << "\"), 10));"; + } else if (isa(CV)) { + Out << "ConstantAggregateZero* " << constName + << " = ConstantAggregateZero::get(" << typeName << ");"; + } else if (isa(CV)) { + Out << "ConstantPointerNull* " << constName + << " = ConstantPointerNull::get(" << typeName << ");"; + } else if (const ConstantFP *CFP = dyn_cast(CV)) { + Out << "ConstantFP* " << constName << " = "; + printCFP(CFP); + Out << ";"; + } else if (const ConstantArray *CA = dyn_cast(CV)) { + if (CA->isString() && + CA->getType()->getElementType() == + Type::getInt8Ty(CA->getContext())) { + Out << "Constant* " << constName << + " = ConstantArray::get(mod->getContext(), \""; + std::string tmp = CA->getAsString(); + bool nullTerminate = false; + if (tmp[tmp.length()-1] == 0) { + tmp.erase(tmp.length()-1); + nullTerminate = true; } - Out << "Constant* " << constName << " = ConstantStruct::get(" - << typeName << ", " << constName << "_fields);"; - } else if (const ConstantVector *CP = dyn_cast(CV)) { + printEscapedString(tmp); + // Determine if we want null termination or not. + if (nullTerminate) + Out << "\", true"; // Indicate that the null terminator should be + // added. + else + Out << "\", false";// No null terminator + Out << ");"; + } else { Out << "std::vector " << constName << "_elems;"; nl(Out); - unsigned N = CP->getNumOperands(); + unsigned N = CA->getNumOperands(); for (unsigned i = 0; i < N; ++i) { - printConstant(CP->getOperand(i)); + printConstant(CA->getOperand(i)); // recurse to print operands Out << constName << "_elems.push_back(" - << getCppName(CP->getOperand(i)) << ");"; + << getCppName(CA->getOperand(i)) << ");"; nl(Out); } - Out << "Constant* " << constName << " = ConstantVector::get(" + Out << "Constant* " << constName << " = ConstantArray::get(" << typeName << ", " << constName << "_elems);"; - } else if (isa(CV)) { - Out << "UndefValue* " << constName << " = UndefValue::get(" - << typeName << ");"; - } else if (const ConstantExpr *CE = dyn_cast(CV)) { - if (CE->getOpcode() == Instruction::GetElementPtr) { - Out << "std::vector " << constName << "_indices;"; + } + } else if (const ConstantStruct *CS = dyn_cast(CV)) { + Out << "std::vector " << constName << "_fields;"; + nl(Out); + unsigned N = CS->getNumOperands(); + for (unsigned i = 0; i < N; i++) { + printConstant(CS->getOperand(i)); + Out << constName << "_fields.push_back(" + << getCppName(CS->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantStruct::get(" + << typeName << ", " << constName << "_fields);"; + } else if (const ConstantVector *CP = dyn_cast(CV)) { + Out << "std::vector " << constName << "_elems;"; + nl(Out); + unsigned N = CP->getNumOperands(); + for (unsigned i = 0; i < N; ++i) { + printConstant(CP->getOperand(i)); + Out << constName << "_elems.push_back(" + << getCppName(CP->getOperand(i)) << ");"; + nl(Out); + } + Out << "Constant* " << constName << " = ConstantVector::get(" + << typeName << ", " << constName << "_elems);"; + } else if (isa(CV)) { + Out << "UndefValue* " << constName << " = UndefValue::get(" + << typeName << ");"; + } else if (const ConstantExpr *CE = dyn_cast(CV)) { + if (CE->getOpcode() == Instruction::GetElementPtr) { + Out << "std::vector " << constName << "_indices;"; + nl(Out); + printConstant(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { + printConstant(CE->getOperand(i)); + Out << constName << "_indices.push_back(" + << getCppName(CE->getOperand(i)) << ");"; nl(Out); - printConstant(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i ) { - printConstant(CE->getOperand(i)); - Out << constName << "_indices.push_back(" - << getCppName(CE->getOperand(i)) << ");"; - nl(Out); - } - Out << "Constant* " << constName - << " = ConstantExpr::getGetElementPtr(" - << getCppName(CE->getOperand(0)) << ", " - << "&" << constName << "_indices[0], " - << constName << "_indices.size()" - << ");"; - } else if (CE->isCast()) { - printConstant(CE->getOperand(0)); - Out << "Constant* " << constName << " = ConstantExpr::getCast("; - switch (CE->getOpcode()) { - default: llvm_unreachable("Invalid cast opcode"); - case Instruction::Trunc: Out << "Instruction::Trunc"; break; - case Instruction::ZExt: Out << "Instruction::ZExt"; break; - case Instruction::SExt: Out << "Instruction::SExt"; break; - case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; - case Instruction::FPExt: Out << "Instruction::FPExt"; break; - case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; - case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; - case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; - case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; - case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; - case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; - case Instruction::BitCast: Out << "Instruction::BitCast"; break; - } - Out << ", " << getCppName(CE->getOperand(0)) << ", " - << getCppName(CE->getType()) << ");"; - } else { - unsigned N = CE->getNumOperands(); - for (unsigned i = 0; i < N; ++i ) { - printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName + << " = ConstantExpr::getGetElementPtr(" + << getCppName(CE->getOperand(0)) << ", " + << "&" << constName << "_indices[0], " + << constName << "_indices.size()" + << ");"; + } else if (CE->isCast()) { + printConstant(CE->getOperand(0)); + Out << "Constant* " << constName << " = ConstantExpr::getCast("; + switch (CE->getOpcode()) { + default: llvm_unreachable("Invalid cast opcode"); + case Instruction::Trunc: Out << "Instruction::Trunc"; break; + case Instruction::ZExt: Out << "Instruction::ZExt"; break; + case Instruction::SExt: Out << "Instruction::SExt"; break; + case Instruction::FPTrunc: Out << "Instruction::FPTrunc"; break; + case Instruction::FPExt: Out << "Instruction::FPExt"; break; + case Instruction::FPToUI: Out << "Instruction::FPToUI"; break; + case Instruction::FPToSI: Out << "Instruction::FPToSI"; break; + case Instruction::UIToFP: Out << "Instruction::UIToFP"; break; + case Instruction::SIToFP: Out << "Instruction::SIToFP"; break; + case Instruction::PtrToInt: Out << "Instruction::PtrToInt"; break; + case Instruction::IntToPtr: Out << "Instruction::IntToPtr"; break; + case Instruction::BitCast: Out << "Instruction::BitCast"; break; + } + Out << ", " << getCppName(CE->getOperand(0)) << ", " + << getCppName(CE->getType()) << ");"; + } else { + unsigned N = CE->getNumOperands(); + for (unsigned i = 0; i < N; ++i ) { + printConstant(CE->getOperand(i)); + } + Out << "Constant* " << constName << " = ConstantExpr::"; + switch (CE->getOpcode()) { + case Instruction::Add: Out << "getAdd("; break; + case Instruction::FAdd: Out << "getFAdd("; break; + case Instruction::Sub: Out << "getSub("; break; + case Instruction::FSub: Out << "getFSub("; break; + case Instruction::Mul: Out << "getMul("; break; + case Instruction::FMul: Out << "getFMul("; break; + case Instruction::UDiv: Out << "getUDiv("; break; + case Instruction::SDiv: Out << "getSDiv("; break; + case Instruction::FDiv: Out << "getFDiv("; break; + case Instruction::URem: Out << "getURem("; break; + case Instruction::SRem: Out << "getSRem("; break; + case Instruction::FRem: Out << "getFRem("; break; + case Instruction::And: Out << "getAnd("; break; + case Instruction::Or: Out << "getOr("; break; + case Instruction::Xor: Out << "getXor("; break; + case Instruction::ICmp: + Out << "getICmp(ICmpInst::ICMP_"; + switch (CE->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "EQ"; break; + case ICmpInst::ICMP_NE: Out << "NE"; break; + case ICmpInst::ICMP_SLT: Out << "SLT"; break; + case ICmpInst::ICMP_ULT: Out << "ULT"; break; + case ICmpInst::ICMP_SGT: Out << "SGT"; break; + case ICmpInst::ICMP_UGT: Out << "UGT"; break; + case ICmpInst::ICMP_SLE: Out << "SLE"; break; + case ICmpInst::ICMP_ULE: Out << "ULE"; break; + case ICmpInst::ICMP_SGE: Out << "SGE"; break; + case ICmpInst::ICMP_UGE: Out << "UGE"; break; + default: error("Invalid ICmp Predicate"); } - Out << "Constant* " << constName << " = ConstantExpr::"; - switch (CE->getOpcode()) { - case Instruction::Add: Out << "getAdd("; break; - case Instruction::FAdd: Out << "getFAdd("; break; - case Instruction::Sub: Out << "getSub("; break; - case Instruction::FSub: Out << "getFSub("; break; - case Instruction::Mul: Out << "getMul("; break; - case Instruction::FMul: Out << "getFMul("; break; - case Instruction::UDiv: Out << "getUDiv("; break; - case Instruction::SDiv: Out << "getSDiv("; break; - case Instruction::FDiv: Out << "getFDiv("; break; - case Instruction::URem: Out << "getURem("; break; - case Instruction::SRem: Out << "getSRem("; break; - case Instruction::FRem: Out << "getFRem("; break; - case Instruction::And: Out << "getAnd("; break; - case Instruction::Or: Out << "getOr("; break; - case Instruction::Xor: Out << "getXor("; break; - case Instruction::ICmp: - Out << "getICmp(ICmpInst::ICMP_"; - switch (CE->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "EQ"; break; - case ICmpInst::ICMP_NE: Out << "NE"; break; - case ICmpInst::ICMP_SLT: Out << "SLT"; break; - case ICmpInst::ICMP_ULT: Out << "ULT"; break; - case ICmpInst::ICMP_SGT: Out << "SGT"; break; - case ICmpInst::ICMP_UGT: Out << "UGT"; break; - case ICmpInst::ICMP_SLE: Out << "SLE"; break; - case ICmpInst::ICMP_ULE: Out << "ULE"; break; - case ICmpInst::ICMP_SGE: Out << "SGE"; break; - case ICmpInst::ICMP_UGE: Out << "UGE"; break; - default: error("Invalid ICmp Predicate"); - } - break; - case Instruction::FCmp: - Out << "getFCmp(FCmpInst::FCMP_"; - switch (CE->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; - case FCmpInst::FCMP_ORD: Out << "ORD"; break; - case FCmpInst::FCMP_UNO: Out << "UNO"; break; - case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; - case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; - case FCmpInst::FCMP_ONE: Out << "ONE"; break; - case FCmpInst::FCMP_UNE: Out << "UNE"; break; - case FCmpInst::FCMP_OLT: Out << "OLT"; break; - case FCmpInst::FCMP_ULT: Out << "ULT"; break; - case FCmpInst::FCMP_OGT: Out << "OGT"; break; - case FCmpInst::FCMP_UGT: Out << "UGT"; break; - case FCmpInst::FCMP_OLE: Out << "OLE"; break; - case FCmpInst::FCMP_ULE: Out << "ULE"; break; - case FCmpInst::FCMP_OGE: Out << "OGE"; break; - case FCmpInst::FCMP_UGE: Out << "UGE"; break; - case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; - default: error("Invalid FCmp Predicate"); - } - break; - case Instruction::Shl: Out << "getShl("; break; - case Instruction::LShr: Out << "getLShr("; break; - case Instruction::AShr: Out << "getAShr("; break; - case Instruction::Select: Out << "getSelect("; break; - case Instruction::ExtractElement: Out << "getExtractElement("; break; - case Instruction::InsertElement: Out << "getInsertElement("; break; - case Instruction::ShuffleVector: Out << "getShuffleVector("; break; - default: - error("Invalid constant expression"); - break; + break; + case Instruction::FCmp: + Out << "getFCmp(FCmpInst::FCMP_"; + switch (CE->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FALSE"; break; + case FCmpInst::FCMP_ORD: Out << "ORD"; break; + case FCmpInst::FCMP_UNO: Out << "UNO"; break; + case FCmpInst::FCMP_OEQ: Out << "OEQ"; break; + case FCmpInst::FCMP_UEQ: Out << "UEQ"; break; + case FCmpInst::FCMP_ONE: Out << "ONE"; break; + case FCmpInst::FCMP_UNE: Out << "UNE"; break; + case FCmpInst::FCMP_OLT: Out << "OLT"; break; + case FCmpInst::FCMP_ULT: Out << "ULT"; break; + case FCmpInst::FCMP_OGT: Out << "OGT"; break; + case FCmpInst::FCMP_UGT: Out << "UGT"; break; + case FCmpInst::FCMP_OLE: Out << "OLE"; break; + case FCmpInst::FCMP_ULE: Out << "ULE"; break; + case FCmpInst::FCMP_OGE: Out << "OGE"; break; + case FCmpInst::FCMP_UGE: Out << "UGE"; break; + case FCmpInst::FCMP_TRUE: Out << "TRUE"; break; + default: error("Invalid FCmp Predicate"); } - Out << getCppName(CE->getOperand(0)); - for (unsigned i = 1; i < CE->getNumOperands(); ++i) - Out << ", " << getCppName(CE->getOperand(i)); - Out << ");"; + break; + case Instruction::Shl: Out << "getShl("; break; + case Instruction::LShr: Out << "getLShr("; break; + case Instruction::AShr: Out << "getAShr("; break; + case Instruction::Select: Out << "getSelect("; break; + case Instruction::ExtractElement: Out << "getExtractElement("; break; + case Instruction::InsertElement: Out << "getInsertElement("; break; + case Instruction::ShuffleVector: Out << "getShuffleVector("; break; + default: + error("Invalid constant expression"); + break; } - } else { - error("Bad Constant"); - Out << "Constant* " << constName << " = 0; "; + Out << getCppName(CE->getOperand(0)); + for (unsigned i = 1; i < CE->getNumOperands(); ++i) + Out << ", " << getCppName(CE->getOperand(i)); + Out << ");"; } - nl(Out); + } else if (const BlockAddress *BA = dyn_cast(CV)) { + Out << "Constant* " << constName << " = "; + Out << "BlockAddress::get(" << getOpName(BA->getBasicBlock()) << ");"; + } else { + error("Bad Constant"); + Out << "Constant* " << constName << " = 0; "; } + nl(Out); +} - void CppWriter::printConstants(const Module* M) { - // Traverse all the global variables looking for constant initializers - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) - if (I->hasInitializer()) - printConstant(I->getInitializer()); - - // Traverse the LLVM functions looking for constants - for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); - FI != FE; ++FI) { - // Add all of the basic blocks and instructions - for (Function::const_iterator BB = FI->begin(), - E = FI->end(); BB != E; ++BB) { - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; - ++I) { - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - if (Constant* C = dyn_cast(I->getOperand(i))) { - printConstant(C); - } +void CppWriter::printConstants(const Module* M) { + // Traverse all the global variables looking for constant initializers + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) + if (I->hasInitializer()) + printConstant(I->getInitializer()); + + // Traverse the LLVM functions looking for constants + for (Module::const_iterator FI = TheModule->begin(), FE = TheModule->end(); + FI != FE; ++FI) { + // Add all of the basic blocks and instructions + for (Function::const_iterator BB = FI->begin(), + E = FI->end(); BB != E; ++BB) { + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; + ++I) { + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + if (Constant* C = dyn_cast(I->getOperand(i))) { + printConstant(C); } } } } } +} - void CppWriter::printVariableUses(const GlobalVariable *GV) { - nl(Out) << "// Type Definitions"; - nl(Out); - printType(GV->getType()); - if (GV->hasInitializer()) { - Constant *Init = GV->getInitializer(); - printType(Init->getType()); - if (Function *F = dyn_cast(Init)) { - nl(Out)<< "/ Function Declarations"; nl(Out); - printFunctionHead(F); - } else if (GlobalVariable* gv = dyn_cast(Init)) { - nl(Out) << "// Global Variable Declarations"; nl(Out); - printVariableHead(gv); - - nl(Out) << "// Global Variable Definitions"; nl(Out); - printVariableBody(gv); - } else { - nl(Out) << "// Constant Definitions"; nl(Out); - printConstant(Init); - } +void CppWriter::printVariableUses(const GlobalVariable *GV) { + nl(Out) << "// Type Definitions"; + nl(Out); + printType(GV->getType()); + if (GV->hasInitializer()) { + Constant *Init = GV->getInitializer(); + printType(Init->getType()); + if (Function *F = dyn_cast(Init)) { + nl(Out)<< "/ Function Declarations"; nl(Out); + printFunctionHead(F); + } else if (GlobalVariable* gv = dyn_cast(Init)) { + nl(Out) << "// Global Variable Declarations"; nl(Out); + printVariableHead(gv); + + nl(Out) << "// Global Variable Definitions"; nl(Out); + printVariableBody(gv); + } else { + nl(Out) << "// Constant Definitions"; nl(Out); + printConstant(Init); } } +} - void CppWriter::printVariableHead(const GlobalVariable *GV) { - nl(Out) << "GlobalVariable* " << getCppName(GV); - if (is_inline) { - Out << " = mod->getGlobalVariable(mod->getContext(), "; - printEscapedString(GV->getName()); - Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; - nl(Out) << "if (!" << getCppName(GV) << ") {"; - in(); nl(Out) << getCppName(GV); - } - Out << " = new GlobalVariable(/*Module=*/*mod, "; - nl(Out) << "/*Type=*/"; - printCppName(GV->getType()->getElementType()); - Out << ","; - nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); - Out << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(GV->getLinkage()); - Out << ","; - nl(Out) << "/*Initializer=*/0, "; - if (GV->hasInitializer()) { - Out << "// has initializer, specified below"; - } - nl(Out) << "/*Name=*/\""; +void CppWriter::printVariableHead(const GlobalVariable *GV) { + nl(Out) << "GlobalVariable* " << getCppName(GV); + if (is_inline) { + Out << " = mod->getGlobalVariable(mod->getContext(), "; printEscapedString(GV->getName()); + Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)"; + nl(Out) << "if (!" << getCppName(GV) << ") {"; + in(); nl(Out) << getCppName(GV); + } + Out << " = new GlobalVariable(/*Module=*/*mod, "; + nl(Out) << "/*Type=*/"; + printCppName(GV->getType()->getElementType()); + Out << ","; + nl(Out) << "/*isConstant=*/" << (GV->isConstant()?"true":"false"); + Out << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(GV->getLinkage()); + Out << ","; + nl(Out) << "/*Initializer=*/0, "; + if (GV->hasInitializer()) { + Out << "// has initializer, specified below"; + } + nl(Out) << "/*Name=*/\""; + printEscapedString(GV->getName()); + Out << "\");"; + nl(Out); + + if (GV->hasSection()) { + printCppName(GV); + Out << "->setSection(\""; + printEscapedString(GV->getSection()); Out << "\");"; nl(Out); - - if (GV->hasSection()) { - printCppName(GV); - Out << "->setSection(\""; - printEscapedString(GV->getSection()); - Out << "\");"; - nl(Out); - } - if (GV->getAlignment()) { - printCppName(GV); - Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; - nl(Out); - } - if (GV->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(GV); - Out << "->setVisibility("; - printVisibilityType(GV->getVisibility()); - Out << ");"; - nl(Out); - } - if (GV->isThreadLocal()) { - printCppName(GV); - Out << "->setThreadLocal(true);"; - nl(Out); - } - if (is_inline) { - out(); Out << "}"; nl(Out); - } } - - void CppWriter::printVariableBody(const GlobalVariable *GV) { - if (GV->hasInitializer()) { - printCppName(GV); - Out << "->setInitializer("; - Out << getCppName(GV->getInitializer()) << ");"; - nl(Out); - } + if (GV->getAlignment()) { + printCppName(GV); + Out << "->setAlignment(" << utostr(GV->getAlignment()) << ");"; + nl(Out); } + if (GV->getVisibility() != GlobalValue::DefaultVisibility) { + printCppName(GV); + Out << "->setVisibility("; + printVisibilityType(GV->getVisibility()); + Out << ");"; + nl(Out); + } + if (GV->isThreadLocal()) { + printCppName(GV); + Out << "->setThreadLocal(true);"; + nl(Out); + } + if (is_inline) { + out(); Out << "}"; nl(Out); + } +} - std::string CppWriter::getOpName(Value* V) { - if (!isa(V) || DefinedValues.find(V) != DefinedValues.end()) - return getCppName(V); - - // See if its alread in the map of forward references, if so just return the - // name we already set up for it - ForwardRefMap::const_iterator I = ForwardRefs.find(V); - if (I != ForwardRefs.end()) - return I->second; - - // This is a new forward reference. Generate a unique name for it - std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - - // Yes, this is a hack. An Argument is the smallest instantiable value that - // we can make as a placeholder for the real value. We'll replace these - // Argument instances later. - Out << "Argument* " << result << " = new Argument(" - << getCppName(V->getType()) << ");"; +void CppWriter::printVariableBody(const GlobalVariable *GV) { + if (GV->hasInitializer()) { + printCppName(GV); + Out << "->setInitializer("; + Out << getCppName(GV->getInitializer()) << ");"; nl(Out); - ForwardRefs[V] = result; - return result; } +} - // printInstruction - This member is called for each Instruction in a function. - void CppWriter::printInstruction(const Instruction *I, - const std::string& bbname) { - std::string iName(getCppName(I)); +std::string CppWriter::getOpName(Value* V) { + if (!isa(V) || DefinedValues.find(V) != DefinedValues.end()) + return getCppName(V); - // Before we emit this instruction, we need to take care of generating any - // forward references. So, we get the names of all the operands in advance - const unsigned Ops(I->getNumOperands()); - std::string* opNames = new std::string[Ops]; - for (unsigned i = 0; i < Ops; i++) { - opNames[i] = getOpName(I->getOperand(i)); - } + // See if its alread in the map of forward references, if so just return the + // name we already set up for it + ForwardRefMap::const_iterator I = ForwardRefs.find(V); + if (I != ForwardRefs.end()) + return I->second; - switch (I->getOpcode()) { - default: - error("Invalid instruction"); - break; + // This is a new forward reference. Generate a unique name for it + std::string result(std::string("fwdref_") + utostr(uniqueNum++)); - case Instruction::Ret: { - const ReturnInst* ret = cast(I); - Out << "ReturnInst::Create(mod->getContext(), " - << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; - break; + // Yes, this is a hack. An Argument is the smallest instantiable value that + // we can make as a placeholder for the real value. We'll replace these + // Argument instances later. + Out << "Argument* " << result << " = new Argument(" + << getCppName(V->getType()) << ");"; + nl(Out); + ForwardRefs[V] = result; + return result; +} + +// printInstruction - This member is called for each Instruction in a function. +void CppWriter::printInstruction(const Instruction *I, + const std::string& bbname) { + std::string iName(getCppName(I)); + + // Before we emit this instruction, we need to take care of generating any + // forward references. So, we get the names of all the operands in advance + const unsigned Ops(I->getNumOperands()); + std::string* opNames = new std::string[Ops]; + for (unsigned i = 0; i < Ops; i++) + opNames[i] = getOpName(I->getOperand(i)); + + switch (I->getOpcode()) { + default: + error("Invalid instruction"); + break; + + case Instruction::Ret: { + const ReturnInst* ret = cast(I); + Out << "ReturnInst::Create(mod->getContext(), " + << (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");"; + break; + } + case Instruction::Br: { + const BranchInst* br = cast(I); + Out << "BranchInst::Create(" ; + if (br->getNumOperands() == 3) { + Out << opNames[2] << ", " + << opNames[1] << ", " + << opNames[0] << ", "; + + } else if (br->getNumOperands() == 1) { + Out << opNames[0] << ", "; + } else { + error("Branch with 2 operands?"); } - case Instruction::Br: { - const BranchInst* br = cast(I); - Out << "BranchInst::Create(" ; - if (br->getNumOperands() == 3 ) { - Out << opNames[2] << ", " - << opNames[1] << ", " - << opNames[0] << ", "; - - } else if (br->getNumOperands() == 1) { - Out << opNames[0] << ", "; - } else { - error("Branch with 2 operands?"); - } - Out << bbname << ");"; - break; + Out << bbname << ");"; + break; + } + case Instruction::Switch: { + const SwitchInst *SI = cast(I); + Out << "SwitchInst* " << iName << " = SwitchInst::Create(" + << opNames[0] << ", " + << opNames[1] << ", " + << SI->getNumCases() << ", " << bbname << ");"; + nl(Out); + for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { + Out << iName << "->addCase(" + << opNames[i] << ", " + << opNames[i+1] << ");"; + nl(Out); } - case Instruction::Switch: { - const SwitchInst *SI = cast(I); - Out << "SwitchInst* " << iName << " = SwitchInst::Create(" - << opNames[0] << ", " - << opNames[1] << ", " - << SI->getNumCases() << ", " << bbname << ");"; + break; + } + case Instruction::IndirectBr: { + const IndirectBrInst *IBI = cast(I); + Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" + << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + nl(Out); + for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { + Out << iName << "->addDestination(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 2; i != SI->getNumOperands(); i += 2) { - Out << iName << "->addCase(" - << opNames[i] << ", " - << opNames[i+1] << ");"; - nl(Out); - } - break; } - case Instruction::IndirectBr: { - const IndirectBrInst *IBI = cast(I); - Out << "IndirectBrInst *" << iName << " = IndirectBrInst::Create(" - << opNames[0] << ", " << IBI->getNumDestinations() << ");"; + break; + } + case Instruction::Invoke: { + const InvokeInst* inv = cast(I); + Out << "std::vector " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < inv->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" + << getOpName(inv->getArgOperand(i)) << ");"; nl(Out); - for (unsigned i = 1; i != IBI->getNumOperands(); ++i) { - Out << iName << "->addDestination(" << opNames[i] << ");"; - nl(Out); - } - break; } - case Instruction::Invoke: { - const InvokeInst* inv = cast(I); - Out << "std::vector " << iName << "_params;"; + // FIXME: This shouldn't use magic numbers -3, -2, and -1. + Out << "InvokeInst *" << iName << " = InvokeInst::Create(" + << getOpName(inv->getCalledFunction()) << ", " + << getOpName(inv->getNormalDest()) << ", " + << getOpName(inv->getUnwindDest()) << ", " + << iName << "_params.begin(), " + << iName << "_params.end(), \""; + printEscapedString(inv->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(inv->getCallingConv()); + Out << ");"; + printAttributes(inv->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Unwind: { + Out << "new UnwindInst(" + << bbname << ");"; + break; + } + case Instruction::Unreachable: { + Out << "new UnreachableInst(" + << "mod->getContext(), " + << bbname << ");"; + break; + } + case Instruction::Add: + case Instruction::FAdd: + case Instruction::Sub: + case Instruction::FSub: + case Instruction::Mul: + case Instruction::FMul: + case Instruction::UDiv: + case Instruction::SDiv: + case Instruction::FDiv: + case Instruction::URem: + case Instruction::SRem: + case Instruction::FRem: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: + case Instruction::Shl: + case Instruction::LShr: + case Instruction::AShr:{ + Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; + switch (I->getOpcode()) { + case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::FAdd: Out << "Instruction::FAdd"; break; + case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::FSub: Out << "Instruction::FSub"; break; + case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::FMul: Out << "Instruction::FMul"; break; + case Instruction::UDiv:Out << "Instruction::UDiv"; break; + case Instruction::SDiv:Out << "Instruction::SDiv"; break; + case Instruction::FDiv:Out << "Instruction::FDiv"; break; + case Instruction::URem:Out << "Instruction::URem"; break; + case Instruction::SRem:Out << "Instruction::SRem"; break; + case Instruction::FRem:Out << "Instruction::FRem"; break; + case Instruction::And: Out << "Instruction::And"; break; + case Instruction::Or: Out << "Instruction::Or"; break; + case Instruction::Xor: Out << "Instruction::Xor"; break; + case Instruction::Shl: Out << "Instruction::Shl"; break; + case Instruction::LShr:Out << "Instruction::LShr"; break; + case Instruction::AShr:Out << "Instruction::AShr"; break; + default: Out << "Instruction::BadOpCode"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::FCmp: { + Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; + switch (cast(I)->getPredicate()) { + case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; + case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; + case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; + case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; + case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; + case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; + case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; + case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; + case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; + case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; + case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; + case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; + case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; + case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; + case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; + case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; + default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::ICmp: { + Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; + switch (cast(I)->getPredicate()) { + case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; + case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; + case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; + case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; + case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; + case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; + case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; + case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; + case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; + case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; + default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; + } + Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; + printEscapedString(I->getName()); + Out << "\");"; + break; + } + case Instruction::Alloca: { + const AllocaInst* allocaI = cast(I); + Out << "AllocaInst* " << iName << " = new AllocaInst(" + << getCppName(allocaI->getAllocatedType()) << ", "; + if (allocaI->isArrayAllocation()) + Out << opNames[0] << ", "; + Out << "\""; + printEscapedString(allocaI->getName()); + Out << "\", " << bbname << ");"; + if (allocaI->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << allocaI->getAlignment() << ");"; + break; + } + case Instruction::Load: { + const LoadInst* load = cast(I); + Out << "LoadInst* " << iName << " = new LoadInst(" + << opNames[0] << ", \""; + printEscapedString(load->getName()); + Out << "\", " << (load->isVolatile() ? "true" : "false" ) + << ", " << bbname << ");"; + break; + } + case Instruction::Store: { + const StoreInst* store = cast(I); + Out << " new StoreInst(" + << opNames[0] << ", " + << opNames[1] << ", " + << (store->isVolatile() ? "true" : "false") + << ", " << bbname << ");"; + break; + } + case Instruction::GetElementPtr: { + const GetElementPtrInst* gep = cast(I); + if (gep->getNumOperands() <= 2) { + Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" + << opNames[0]; + if (gep->getNumOperands() == 2) + Out << ", " << opNames[1]; + } else { + Out << "std::vector " << iName << "_indices;"; nl(Out); - for (unsigned i = 0; i < inv->getNumOperands() - 3; ++i) { - Out << iName << "_params.push_back(" + for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { + Out << iName << "_indices.push_back(" << opNames[i] << ");"; nl(Out); } - Out << "InvokeInst *" << iName << " = InvokeInst::Create(" - << opNames[Ops - 3] << ", " - << opNames[Ops - 2] << ", " - << opNames[Ops - 1] << ", " - << iName << "_params.begin(), " << iName << "_params.end(), \""; - printEscapedString(inv->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(inv->getCallingConv()); - Out << ");"; - printAttributes(inv->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; + Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" + << opNames[0] << ", " << iName << "_indices.begin(), " + << iName << "_indices.end()"; + } + Out << ", \""; + printEscapedString(gep->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::PHI: { + const PHINode* phi = cast(I); + + Out << "PHINode* " << iName << " = PHINode::Create(" + << getCppName(phi->getType()) << ", \""; + printEscapedString(phi->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->reserveOperandSpace(" + << phi->getNumIncomingValues() + << ");"; + nl(Out); + for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { + Out << iName << "->addIncoming(" + << opNames[i] << ", " << opNames[i+1] << ");"; nl(Out); - break; - } - case Instruction::Unwind: { - Out << "new UnwindInst(" - << bbname << ");"; - break; - } - case Instruction::Unreachable: { - Out << "new UnreachableInst(" - << "mod->getContext(), " - << bbname << ");"; - break; - } - case Instruction::Add: - case Instruction::FAdd: - case Instruction::Sub: - case Instruction::FSub: - case Instruction::Mul: - case Instruction::FMul: - case Instruction::UDiv: - case Instruction::SDiv: - case Instruction::FDiv: - case Instruction::URem: - case Instruction::SRem: - case Instruction::FRem: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr:{ - Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; - switch (I->getOpcode()) { - case Instruction::Add: Out << "Instruction::Add"; break; - case Instruction::FAdd: Out << "Instruction::FAdd"; break; - case Instruction::Sub: Out << "Instruction::Sub"; break; - case Instruction::FSub: Out << "Instruction::FSub"; break; - case Instruction::Mul: Out << "Instruction::Mul"; break; - case Instruction::FMul: Out << "Instruction::FMul"; break; - case Instruction::UDiv:Out << "Instruction::UDiv"; break; - case Instruction::SDiv:Out << "Instruction::SDiv"; break; - case Instruction::FDiv:Out << "Instruction::FDiv"; break; - case Instruction::URem:Out << "Instruction::URem"; break; - case Instruction::SRem:Out << "Instruction::SRem"; break; - case Instruction::FRem:Out << "Instruction::FRem"; break; - case Instruction::And: Out << "Instruction::And"; break; - case Instruction::Or: Out << "Instruction::Or"; break; - case Instruction::Xor: Out << "Instruction::Xor"; break; - case Instruction::Shl: Out << "Instruction::Shl"; break; - case Instruction::LShr:Out << "Instruction::LShr"; break; - case Instruction::AShr:Out << "Instruction::AShr"; break; - default: Out << "Instruction::BadOpCode"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::FCmp: { - Out << "FCmpInst* " << iName << " = new FCmpInst(*" << bbname << ", "; - switch (cast(I)->getPredicate()) { - case FCmpInst::FCMP_FALSE: Out << "FCmpInst::FCMP_FALSE"; break; - case FCmpInst::FCMP_OEQ : Out << "FCmpInst::FCMP_OEQ"; break; - case FCmpInst::FCMP_OGT : Out << "FCmpInst::FCMP_OGT"; break; - case FCmpInst::FCMP_OGE : Out << "FCmpInst::FCMP_OGE"; break; - case FCmpInst::FCMP_OLT : Out << "FCmpInst::FCMP_OLT"; break; - case FCmpInst::FCMP_OLE : Out << "FCmpInst::FCMP_OLE"; break; - case FCmpInst::FCMP_ONE : Out << "FCmpInst::FCMP_ONE"; break; - case FCmpInst::FCMP_ORD : Out << "FCmpInst::FCMP_ORD"; break; - case FCmpInst::FCMP_UNO : Out << "FCmpInst::FCMP_UNO"; break; - case FCmpInst::FCMP_UEQ : Out << "FCmpInst::FCMP_UEQ"; break; - case FCmpInst::FCMP_UGT : Out << "FCmpInst::FCMP_UGT"; break; - case FCmpInst::FCMP_UGE : Out << "FCmpInst::FCMP_UGE"; break; - case FCmpInst::FCMP_ULT : Out << "FCmpInst::FCMP_ULT"; break; - case FCmpInst::FCMP_ULE : Out << "FCmpInst::FCMP_ULE"; break; - case FCmpInst::FCMP_UNE : Out << "FCmpInst::FCMP_UNE"; break; - case FCmpInst::FCMP_TRUE : Out << "FCmpInst::FCMP_TRUE"; break; - default: Out << "FCmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::ICmp: { - Out << "ICmpInst* " << iName << " = new ICmpInst(*" << bbname << ", "; - switch (cast(I)->getPredicate()) { - case ICmpInst::ICMP_EQ: Out << "ICmpInst::ICMP_EQ"; break; - case ICmpInst::ICMP_NE: Out << "ICmpInst::ICMP_NE"; break; - case ICmpInst::ICMP_ULE: Out << "ICmpInst::ICMP_ULE"; break; - case ICmpInst::ICMP_SLE: Out << "ICmpInst::ICMP_SLE"; break; - case ICmpInst::ICMP_UGE: Out << "ICmpInst::ICMP_UGE"; break; - case ICmpInst::ICMP_SGE: Out << "ICmpInst::ICMP_SGE"; break; - case ICmpInst::ICMP_ULT: Out << "ICmpInst::ICMP_ULT"; break; - case ICmpInst::ICMP_SLT: Out << "ICmpInst::ICMP_SLT"; break; - case ICmpInst::ICMP_UGT: Out << "ICmpInst::ICMP_UGT"; break; - case ICmpInst::ICMP_SGT: Out << "ICmpInst::ICMP_SGT"; break; - default: Out << "ICmpInst::BAD_ICMP_PREDICATE"; break; - } - Out << ", " << opNames[0] << ", " << opNames[1] << ", \""; - printEscapedString(I->getName()); - Out << "\");"; - break; - } - case Instruction::Alloca: { - const AllocaInst* allocaI = cast(I); - Out << "AllocaInst* " << iName << " = new AllocaInst(" - << getCppName(allocaI->getAllocatedType()) << ", "; - if (allocaI->isArrayAllocation()) - Out << opNames[0] << ", "; - Out << "\""; - printEscapedString(allocaI->getName()); - Out << "\", " << bbname << ");"; - if (allocaI->getAlignment()) - nl(Out) << iName << "->setAlignment(" - << allocaI->getAlignment() << ");"; - break; - } - case Instruction::Load:{ - const LoadInst* load = cast(I); - Out << "LoadInst* " << iName << " = new LoadInst(" - << opNames[0] << ", \""; - printEscapedString(load->getName()); - Out << "\", " << (load->isVolatile() ? "true" : "false" ) - << ", " << bbname << ");"; - break; - } - case Instruction::Store: { - const StoreInst* store = cast(I); - Out << " new StoreInst(" - << opNames[0] << ", " - << opNames[1] << ", " - << (store->isVolatile() ? "true" : "false") - << ", " << bbname << ");"; - break; - } - case Instruction::GetElementPtr: { - const GetElementPtrInst* gep = cast(I); - if (gep->getNumOperands() <= 2) { - Out << "GetElementPtrInst* " << iName << " = GetElementPtrInst::Create(" - << opNames[0]; - if (gep->getNumOperands() == 2) - Out << ", " << opNames[1]; - } else { - Out << "std::vector " << iName << "_indices;"; - nl(Out); - for (unsigned i = 1; i < gep->getNumOperands(); ++i ) { - Out << iName << "_indices.push_back(" - << opNames[i] << ");"; - nl(Out); - } - Out << "Instruction* " << iName << " = GetElementPtrInst::Create(" - << opNames[0] << ", " << iName << "_indices.begin(), " - << iName << "_indices.end()"; - } - Out << ", \""; - printEscapedString(gep->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::PHI: { - const PHINode* phi = cast(I); - - Out << "PHINode* " << iName << " = PHINode::Create(" - << getCppName(phi->getType()) << ", \""; - printEscapedString(phi->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->reserveOperandSpace(" - << phi->getNumIncomingValues() - << ");"; + break; + } + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + case Instruction::FPTrunc: + case Instruction::FPExt: + case Instruction::FPToUI: + case Instruction::FPToSI: + case Instruction::UIToFP: + case Instruction::SIToFP: + case Instruction::PtrToInt: + case Instruction::IntToPtr: + case Instruction::BitCast: { + const CastInst* cst = cast(I); + Out << "CastInst* " << iName << " = new "; + switch (I->getOpcode()) { + case Instruction::Trunc: Out << "TruncInst"; break; + case Instruction::ZExt: Out << "ZExtInst"; break; + case Instruction::SExt: Out << "SExtInst"; break; + case Instruction::FPTrunc: Out << "FPTruncInst"; break; + case Instruction::FPExt: Out << "FPExtInst"; break; + case Instruction::FPToUI: Out << "FPToUIInst"; break; + case Instruction::FPToSI: Out << "FPToSIInst"; break; + case Instruction::UIToFP: Out << "UIToFPInst"; break; + case Instruction::SIToFP: Out << "SIToFPInst"; break; + case Instruction::PtrToInt: Out << "PtrToIntInst"; break; + case Instruction::IntToPtr: Out << "IntToPtrInst"; break; + case Instruction::BitCast: Out << "BitCastInst"; break; + default: assert(!"Unreachable"); break; + } + Out << "(" << opNames[0] << ", " + << getCppName(cst->getType()) << ", \""; + printEscapedString(cst->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::Call: { + const CallInst* call = cast(I); + if (const InlineAsm* ila = dyn_cast(call->getCalledValue())) { + Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" + << getCppName(ila->getFunctionType()) << ", \"" + << ila->getAsmString() << "\", \"" + << ila->getConstraintString() << "\"," + << (ila->hasSideEffects() ? "true" : "false") << ");"; nl(Out); - for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { - Out << iName << "->addIncoming(" - << opNames[i] << ", " << opNames[i+1] << ");"; - nl(Out); - } - break; - } - case Instruction::Trunc: - case Instruction::ZExt: - case Instruction::SExt: - case Instruction::FPTrunc: - case Instruction::FPExt: - case Instruction::FPToUI: - case Instruction::FPToSI: - case Instruction::UIToFP: - case Instruction::SIToFP: - case Instruction::PtrToInt: - case Instruction::IntToPtr: - case Instruction::BitCast: { - const CastInst* cst = cast(I); - Out << "CastInst* " << iName << " = new "; - switch (I->getOpcode()) { - case Instruction::Trunc: Out << "TruncInst"; break; - case Instruction::ZExt: Out << "ZExtInst"; break; - case Instruction::SExt: Out << "SExtInst"; break; - case Instruction::FPTrunc: Out << "FPTruncInst"; break; - case Instruction::FPExt: Out << "FPExtInst"; break; - case Instruction::FPToUI: Out << "FPToUIInst"; break; - case Instruction::FPToSI: Out << "FPToSIInst"; break; - case Instruction::UIToFP: Out << "UIToFPInst"; break; - case Instruction::SIToFP: Out << "SIToFPInst"; break; - case Instruction::PtrToInt: Out << "PtrToIntInst"; break; - case Instruction::IntToPtr: Out << "IntToPtrInst"; break; - case Instruction::BitCast: Out << "BitCastInst"; break; - default: assert(!"Unreachable"); break; - } - Out << "(" << opNames[0] << ", " - << getCppName(cst->getType()) << ", \""; - printEscapedString(cst->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::Call:{ - const CallInst* call = cast(I); - if (const InlineAsm* ila = dyn_cast(call->getCalledValue())) { - Out << "InlineAsm* " << getCppName(ila) << " = InlineAsm::get(" - << getCppName(ila->getFunctionType()) << ", \"" - << ila->getAsmString() << "\", \"" - << ila->getConstraintString() << "\"," - << (ila->hasSideEffects() ? "true" : "false") << ");"; - nl(Out); - } - if (call->getNumOperands() > 2) { - Out << "std::vector " << iName << "_params;"; + if (call->getNumArgOperands() > 1) { + Out << "std::vector " << iName << "_params;"; + nl(Out); + for (unsigned i = 0; i < call->getNumArgOperands(); ++i) { + Out << iName << "_params.push_back(" << opNames[i] << ");"; nl(Out); - for (unsigned i = 1; i < call->getNumOperands(); ++i) { - Out << iName << "_params.push_back(" << opNames[i] << ");"; - nl(Out); - } - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << iName << "_params.begin(), " - << iName << "_params.end(), \""; - } else if (call->getNumOperands() == 2) { - Out << "CallInst* " << iName << " = CallInst::Create(" - << opNames[0] << ", " << opNames[1] << ", \""; - } else { - Out << "CallInst* " << iName << " = CallInst::Create(" << opNames[0] - << ", \""; } - printEscapedString(call->getName()); - Out << "\", " << bbname << ");"; - nl(Out) << iName << "->setCallingConv("; - printCallingConv(call->getCallingConv()); - Out << ");"; - nl(Out) << iName << "->setTailCall(" - << (call->isTailCall() ? "true":"false"); - Out << ");"; - printAttributes(call->getAttributes(), iName); - Out << iName << "->setAttributes(" << iName << "_PAL);"; - nl(Out); - break; - } - case Instruction::Select: { - const SelectInst* sel = cast(I); - Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; - Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(sel->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::UserOp1: - /// FALL THROUGH - case Instruction::UserOp2: { - /// FIXME: What should be done here? - break; - } - case Instruction::VAArg: { - const VAArgInst* va = cast(I); - Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" - << opNames[0] << ", " << getCppName(va->getType()) << ", \""; - printEscapedString(va->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ExtractElement: { - const ExtractElementInst* eei = cast(I); - Out << "ExtractElementInst* " << getCppName(eei) - << " = new ExtractElementInst(" << opNames[0] - << ", " << opNames[1] << ", \""; - printEscapedString(eei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::InsertElement: { - const InsertElementInst* iei = cast(I); - Out << "InsertElementInst* " << getCppName(iei) - << " = InsertElementInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(iei->getName()); - Out << "\", " << bbname << ");"; - break; - } - case Instruction::ShuffleVector: { - const ShuffleVectorInst* svi = cast(I); - Out << "ShuffleVectorInst* " << getCppName(svi) - << " = new ShuffleVectorInst(" << opNames[0] - << ", " << opNames[1] << ", " << opNames[2] << ", \""; - printEscapedString(svi->getName()); - Out << "\", " << bbname << ");"; - break; + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << iName << "_params.begin(), " + << iName << "_params.end(), \""; + } else if (call->getNumArgOperands() == 1) { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", " << opNames[0] << ", \""; + } else { + Out << "CallInst* " << iName << " = CallInst::Create(" + << opNames[call->getNumArgOperands()] << ", \""; } - case Instruction::ExtractValue: { - const ExtractValueInst *evi = cast(I); - Out << "std::vector " << iName << "_indices;"; + printEscapedString(call->getName()); + Out << "\", " << bbname << ");"; + nl(Out) << iName << "->setCallingConv("; + printCallingConv(call->getCallingConv()); + Out << ");"; + nl(Out) << iName << "->setTailCall(" + << (call->isTailCall() ? "true" : "false"); + Out << ");"; + nl(Out); + printAttributes(call->getAttributes(), iName); + Out << iName << "->setAttributes(" << iName << "_PAL);"; + nl(Out); + break; + } + case Instruction::Select: { + const SelectInst* sel = cast(I); + Out << "SelectInst* " << getCppName(sel) << " = SelectInst::Create("; + Out << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(sel->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::UserOp1: + /// FALL THROUGH + case Instruction::UserOp2: { + /// FIXME: What should be done here? + break; + } + case Instruction::VAArg: { + const VAArgInst* va = cast(I); + Out << "VAArgInst* " << getCppName(va) << " = new VAArgInst(" + << opNames[0] << ", " << getCppName(va->getType()) << ", \""; + printEscapedString(va->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractElement: { + const ExtractElementInst* eei = cast(I); + Out << "ExtractElementInst* " << getCppName(eei) + << " = new ExtractElementInst(" << opNames[0] + << ", " << opNames[1] << ", \""; + printEscapedString(eei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertElement: { + const InsertElementInst* iei = cast(I); + Out << "InsertElementInst* " << getCppName(iei) + << " = InsertElementInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(iei->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ShuffleVector: { + const ShuffleVectorInst* svi = cast(I); + Out << "ShuffleVectorInst* " << getCppName(svi) + << " = new ShuffleVectorInst(" << opNames[0] + << ", " << opNames[1] << ", " << opNames[2] << ", \""; + printEscapedString(svi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::ExtractValue: { + const ExtractValueInst *evi = cast(I); + Out << "std::vector " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < evi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << evi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < evi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << evi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "ExtractValueInst* " << getCppName(evi) - << " = ExtractValueInst::Create(" << opNames[0] - << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(evi->getName()); - Out << "\", " << bbname << ");"; - break; } - case Instruction::InsertValue: { - const InsertValueInst *ivi = cast(I); - Out << "std::vector " << iName << "_indices;"; + Out << "ExtractValueInst* " << getCppName(evi) + << " = ExtractValueInst::Create(" << opNames[0] + << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(evi->getName()); + Out << "\", " << bbname << ");"; + break; + } + case Instruction::InsertValue: { + const InsertValueInst *ivi = cast(I); + Out << "std::vector " << iName << "_indices;"; + nl(Out); + for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { + Out << iName << "_indices.push_back(" + << ivi->idx_begin()[i] << ");"; nl(Out); - for (unsigned i = 0; i < ivi->getNumIndices(); ++i) { - Out << iName << "_indices.push_back(" - << ivi->idx_begin()[i] << ");"; - nl(Out); - } - Out << "InsertValueInst* " << getCppName(ivi) - << " = InsertValueInst::Create(" << opNames[0] - << ", " << opNames[1] << ", " - << iName << "_indices.begin(), " << iName << "_indices.end(), \""; - printEscapedString(ivi->getName()); - Out << "\", " << bbname << ");"; - break; } + Out << "InsertValueInst* " << getCppName(ivi) + << " = InsertValueInst::Create(" << opNames[0] + << ", " << opNames[1] << ", " + << iName << "_indices.begin(), " << iName << "_indices.end(), \""; + printEscapedString(ivi->getName()); + Out << "\", " << bbname << ");"; + break; + } } DefinedValues.insert(I); nl(Out); delete [] opNames; } - // Print out the types, constants and declarations needed by one function - void CppWriter::printFunctionUses(const Function* F) { - nl(Out) << "// Type Definitions"; nl(Out); - if (!is_inline) { - // Print the function's return type - printType(F->getReturnType()); +// Print out the types, constants and declarations needed by one function +void CppWriter::printFunctionUses(const Function* F) { + nl(Out) << "// Type Definitions"; nl(Out); + if (!is_inline) { + // Print the function's return type + printType(F->getReturnType()); - // Print the function's function type - printType(F->getFunctionType()); + // Print the function's function type + printType(F->getFunctionType()); - // Print the types of each of the function's arguments - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - printType(AI->getType()); - } + // Print the types of each of the function's arguments + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + printType(AI->getType()); } + } - // Print type definitions for every type referenced by an instruction and - // make a note of any global values or constants that are referenced - SmallPtrSet gvs; - SmallPtrSet consts; - for (Function::const_iterator BB = F->begin(), BE = F->end(); - BB != BE; ++BB){ - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); - I != E; ++I) { - // Print the type of the instruction itself - printType(I->getType()); + // Print type definitions for every type referenced by an instruction and + // make a note of any global values or constants that are referenced + SmallPtrSet gvs; + SmallPtrSet consts; + for (Function::const_iterator BB = F->begin(), BE = F->end(); + BB != BE; ++BB){ + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + // Print the type of the instruction itself + printType(I->getType()); - // Print the type of each of the instruction's operands - for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value* operand = I->getOperand(i); - printType(operand->getType()); - - // If the operand references a GVal or Constant, make a note of it - if (GlobalValue* GV = dyn_cast(operand)) { - gvs.insert(GV); - if (GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->hasInitializer()) - consts.insert(GVar->getInitializer()); - } else if (Constant* C = dyn_cast(operand)) - consts.insert(C); - } + // Print the type of each of the instruction's operands + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + Value* operand = I->getOperand(i); + printType(operand->getType()); + + // If the operand references a GVal or Constant, make a note of it + if (GlobalValue* GV = dyn_cast(operand)) { + gvs.insert(GV); + if (GlobalVariable *GVar = dyn_cast(GV)) + if (GVar->hasInitializer()) + consts.insert(GVar->getInitializer()); + } else if (Constant* C = dyn_cast(operand)) + consts.insert(C); } } + } - // Print the function declarations for any functions encountered - nl(Out) << "// Function Declarations"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (Function* Fun = dyn_cast(*I)) { - if (!is_inline || Fun != F) - printFunctionHead(Fun); - } + // Print the function declarations for any functions encountered + nl(Out) << "// Function Declarations"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (Function* Fun = dyn_cast(*I)) { + if (!is_inline || Fun != F) + printFunctionHead(Fun); } + } - // Print the global variable declarations for any variables encountered - nl(Out) << "// Global Variable Declarations"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* F = dyn_cast(*I)) - printVariableHead(F); - } + // Print the global variable declarations for any variables encountered + nl(Out) << "// Global Variable Declarations"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* F = dyn_cast(*I)) + printVariableHead(F); + } - // Print the constants found - nl(Out) << "// Constant Definitions"; nl(Out); - for (SmallPtrSet::iterator I = consts.begin(), - E = consts.end(); I != E; ++I) { - printConstant(*I); - } +// Print the constants found + nl(Out) << "// Constant Definitions"; nl(Out); + for (SmallPtrSet::iterator I = consts.begin(), + E = consts.end(); I != E; ++I) { + printConstant(*I); + } - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); - I != E; ++I) { - if (GlobalVariable* GV = dyn_cast(*I)) - printVariableBody(GV); - } + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (SmallPtrSet::iterator I = gvs.begin(), E = gvs.end(); + I != E; ++I) { + if (GlobalVariable* GV = dyn_cast(*I)) + printVariableBody(GV); } +} - void CppWriter::printFunctionHead(const Function* F) { - nl(Out) << "Function* " << getCppName(F); - if (is_inline) { - Out << " = mod->getFunction(\""; - printEscapedString(F->getName()); - Out << "\", " << getCppName(F->getFunctionType()) << ");"; - nl(Out) << "if (!" << getCppName(F) << ") {"; - nl(Out) << getCppName(F); - } - Out<< " = Function::Create("; - nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; - nl(Out) << "/*Linkage=*/"; - printLinkageType(F->getLinkage()); - Out << ","; - nl(Out) << "/*Name=*/\""; +void CppWriter::printFunctionHead(const Function* F) { + nl(Out) << "Function* " << getCppName(F); + if (is_inline) { + Out << " = mod->getFunction(\""; printEscapedString(F->getName()); - Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); - nl(Out,-1); + Out << "\", " << getCppName(F->getFunctionType()) << ");"; + nl(Out) << "if (!" << getCppName(F) << ") {"; + nl(Out) << getCppName(F); + } + Out<< " = Function::Create("; + nl(Out,1) << "/*Type=*/" << getCppName(F->getFunctionType()) << ","; + nl(Out) << "/*Linkage=*/"; + printLinkageType(F->getLinkage()); + Out << ","; + nl(Out) << "/*Name=*/\""; + printEscapedString(F->getName()); + Out << "\", mod); " << (F->isDeclaration()? "// (external, no body)" : ""); + nl(Out,-1); + printCppName(F); + Out << "->setCallingConv("; + printCallingConv(F->getCallingConv()); + Out << ");"; + nl(Out); + if (F->hasSection()) { + printCppName(F); + Out << "->setSection(\"" << F->getSection() << "\");"; + nl(Out); + } + if (F->getAlignment()) { + printCppName(F); + Out << "->setAlignment(" << F->getAlignment() << ");"; + nl(Out); + } + if (F->getVisibility() != GlobalValue::DefaultVisibility) { printCppName(F); - Out << "->setCallingConv("; - printCallingConv(F->getCallingConv()); + Out << "->setVisibility("; + printVisibilityType(F->getVisibility()); Out << ");"; nl(Out); - if (F->hasSection()) { - printCppName(F); - Out << "->setSection(\"" << F->getSection() << "\");"; - nl(Out); - } - if (F->getAlignment()) { - printCppName(F); - Out << "->setAlignment(" << F->getAlignment() << ");"; - nl(Out); - } - if (F->getVisibility() != GlobalValue::DefaultVisibility) { - printCppName(F); - Out << "->setVisibility("; - printVisibilityType(F->getVisibility()); - Out << ");"; - nl(Out); - } - if (F->hasGC()) { - printCppName(F); - Out << "->setGC(\"" << F->getGC() << "\");"; - nl(Out); - } - if (is_inline) { - Out << "}"; - nl(Out); - } - printAttributes(F->getAttributes(), getCppName(F)); + } + if (F->hasGC()) { printCppName(F); - Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + Out << "->setGC(\"" << F->getGC() << "\");"; nl(Out); } + if (is_inline) { + Out << "}"; + nl(Out); + } + printAttributes(F->getAttributes(), getCppName(F)); + printCppName(F); + Out << "->setAttributes(" << getCppName(F) << "_PAL);"; + nl(Out); +} - void CppWriter::printFunctionBody(const Function *F) { - if (F->isDeclaration()) - return; // external functions have no bodies. - - // Clear the DefinedValues and ForwardRefs maps because we can't have - // cross-function forward refs - ForwardRefs.clear(); - DefinedValues.clear(); +void CppWriter::printFunctionBody(const Function *F) { + if (F->isDeclaration()) + return; // external functions have no bodies. - // Create all the argument values - if (!is_inline) { - if (!F->arg_empty()) { - Out << "Function::arg_iterator args = " << getCppName(F) - << "->arg_begin();"; - nl(Out); - } - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << "Value* " << getCppName(AI) << " = args++;"; - nl(Out); - if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; - nl(Out); - } - } - } + // Clear the DefinedValues and ForwardRefs maps because we can't have + // cross-function forward refs + ForwardRefs.clear(); + DefinedValues.clear(); - // Create all the basic blocks - nl(Out); - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - Out << "BasicBlock* " << bbname << - " = BasicBlock::Create(mod->getContext(), \""; - if (BI->hasName()) - printEscapedString(BI->getName()); - Out << "\"," << getCppName(BI->getParent()) << ",0);"; + // Create all the argument values + if (!is_inline) { + if (!F->arg_empty()) { + Out << "Function::arg_iterator args = " << getCppName(F) + << "->arg_begin();"; nl(Out); } - - // Output all of its basic blocks... for the function - for (Function::const_iterator BI = F->begin(), BE = F->end(); - BI != BE; ++BI) { - std::string bbname(getCppName(BI)); - nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << "Value* " << getCppName(AI) << " = args++;"; nl(Out); - - // Output all of the instructions in the basic block... - for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); - I != E; ++I) { - printInstruction(I,bbname); + if (AI->hasName()) { + Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; + nl(Out); } } + } - // Loop over the ForwardRefs and resolve them now that all instructions - // are generated. - if (!ForwardRefs.empty()) { - nl(Out) << "// Resolve Forward References"; - nl(Out); - } + // Create all the basic blocks + nl(Out); + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + Out << "BasicBlock* " << bbname << + " = BasicBlock::Create(mod->getContext(), \""; + if (BI->hasName()) + printEscapedString(BI->getName()); + Out << "\"," << getCppName(BI->getParent()) << ",0);"; + nl(Out); + } - while (!ForwardRefs.empty()) { - ForwardRefMap::iterator I = ForwardRefs.begin(); - Out << I->second << "->replaceAllUsesWith(" - << getCppName(I->first) << "); delete " << I->second << ";"; - nl(Out); - ForwardRefs.erase(I); + // Output all of its basic blocks... for the function + for (Function::const_iterator BI = F->begin(), BE = F->end(); + BI != BE; ++BI) { + std::string bbname(getCppName(BI)); + nl(Out) << "// Block " << BI->getName() << " (" << bbname << ")"; + nl(Out); + + // Output all of the instructions in the basic block... + for (BasicBlock::const_iterator I = BI->begin(), E = BI->end(); + I != E; ++I) { + printInstruction(I,bbname); } } - void CppWriter::printInline(const std::string& fname, - const std::string& func) { - const Function* F = TheModule->getFunction(func); - if (!F) { - error(std::string("Function '") + func + "' not found in input module"); - return; - } - if (F->isDeclaration()) { - error(std::string("Function '") + func + "' is external!"); - return; - } - nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" - << getCppName(F); - unsigned arg_count = 1; - for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - Out << ", Value* arg_" << arg_count; - } - Out << ") {"; + // Loop over the ForwardRefs and resolve them now that all instructions + // are generated. + if (!ForwardRefs.empty()) { + nl(Out) << "// Resolve Forward References"; nl(Out); - is_inline = true; - printFunctionUses(F); - printFunctionBody(F); - is_inline = false; - Out << "return " << getCppName(F->begin()) << ";"; - nl(Out) << "}"; + } + + while (!ForwardRefs.empty()) { + ForwardRefMap::iterator I = ForwardRefs.begin(); + Out << I->second << "->replaceAllUsesWith(" + << getCppName(I->first) << "); delete " << I->second << ";"; nl(Out); + ForwardRefs.erase(I); } +} - void CppWriter::printModuleBody() { - // Print out all the type definitions - nl(Out) << "// Type Definitions"; nl(Out); - printTypes(TheModule); - - // Functions can call each other and global variables can reference them so - // define all the functions first before emitting their function bodies. - nl(Out) << "// Function Declarations"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) - printFunctionHead(I); - - // Process the global variables declarations. We can't initialze them until - // after the constants are printed so just print a header for each global - nl(Out) << "// Global Variable Declarations\n"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableHead(I); - } +void CppWriter::printInline(const std::string& fname, + const std::string& func) { + const Function* F = TheModule->getFunction(func); + if (!F) { + error(std::string("Function '") + func + "' not found in input module"); + return; + } + if (F->isDeclaration()) { + error(std::string("Function '") + func + "' is external!"); + return; + } + nl(Out) << "BasicBlock* " << fname << "(Module* mod, Function *" + << getCppName(F); + unsigned arg_count = 1; + for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + AI != AE; ++AI) { + Out << ", Value* arg_" << arg_count; + } + Out << ") {"; + nl(Out); + is_inline = true; + printFunctionUses(F); + printFunctionBody(F); + is_inline = false; + Out << "return " << getCppName(F->begin()) << ";"; + nl(Out) << "}"; + nl(Out); +} - // Print out all the constants definitions. Constants don't recurse except - // through GlobalValues. All GlobalValues have been declared at this point - // so we can proceed to generate the constants. - nl(Out) << "// Constant Definitions"; nl(Out); - printConstants(TheModule); - - // Process the global variables definitions now that all the constants have - // been emitted. These definitions just couple the gvars with their constant - // initializers. - nl(Out) << "// Global Variable Definitions"; nl(Out); - for (Module::const_global_iterator I = TheModule->global_begin(), - E = TheModule->global_end(); I != E; ++I) { - printVariableBody(I); - } +void CppWriter::printModuleBody() { + // Print out all the type definitions + nl(Out) << "// Type Definitions"; nl(Out); + printTypes(TheModule); + + // Functions can call each other and global variables can reference them so + // define all the functions first before emitting their function bodies. + nl(Out) << "// Function Declarations"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) + printFunctionHead(I); + + // Process the global variables declarations. We can't initialze them until + // after the constants are printed so just print a header for each global + nl(Out) << "// Global Variable Declarations\n"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableHead(I); + } - // Finally, we can safely put out all of the function bodies. - nl(Out) << "// Function Definitions"; nl(Out); - for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); - I != E; ++I) { - if (!I->isDeclaration()) { - nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) - << ")"; - nl(Out) << "{"; - nl(Out,1); - printFunctionBody(I); - nl(Out,-1) << "}"; - nl(Out); - } - } + // Print out all the constants definitions. Constants don't recurse except + // through GlobalValues. All GlobalValues have been declared at this point + // so we can proceed to generate the constants. + nl(Out) << "// Constant Definitions"; nl(Out); + printConstants(TheModule); + + // Process the global variables definitions now that all the constants have + // been emitted. These definitions just couple the gvars with their constant + // initializers. + nl(Out) << "// Global Variable Definitions"; nl(Out); + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + printVariableBody(I); } - void CppWriter::printProgram(const std::string& fname, - const std::string& mName) { - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "#include \n"; - Out << "using namespace llvm;\n\n"; - Out << "Module* " << fname << "();\n\n"; - Out << "int main(int argc, char**argv) {\n"; - Out << " Module* Mod = " << fname << "();\n"; - Out << " verifyModule(*Mod, PrintMessageAction);\n"; - Out << " PassManager PM;\n"; - Out << " PM.add(createPrintModulePass(&outs()));\n"; - Out << " PM.run(*Mod);\n"; - Out << " return 0;\n"; - Out << "}\n\n"; - printModule(fname,mName); - } - - void CppWriter::printModule(const std::string& fname, - const std::string& mName) { - nl(Out) << "Module* " << fname << "() {"; - nl(Out,1) << "// Module Construction"; - nl(Out) << "Module* mod = new Module(\""; - printEscapedString(mName); - Out << "\", getGlobalContext());"; - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; - } - if (!TheModule->getTargetTriple().empty()) { - nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() - << "\");"; + // Finally, we can safely put out all of the function bodies. + nl(Out) << "// Function Definitions"; nl(Out); + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->isDeclaration()) { + nl(Out) << "// Function: " << I->getName() << " (" << getCppName(I) + << ")"; + nl(Out) << "{"; + nl(Out,1); + printFunctionBody(I); + nl(Out,-1) << "}"; + nl(Out); } + } +} - if (!TheModule->getModuleInlineAsm().empty()) { - nl(Out) << "mod->setModuleInlineAsm(\""; - printEscapedString(TheModule->getModuleInlineAsm()); - Out << "\");"; - } - nl(Out); +void CppWriter::printProgram(const std::string& fname, + const std::string& mName) { + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "#include \n"; + Out << "using namespace llvm;\n\n"; + Out << "Module* " << fname << "();\n\n"; + Out << "int main(int argc, char**argv) {\n"; + Out << " Module* Mod = " << fname << "();\n"; + Out << " verifyModule(*Mod, PrintMessageAction);\n"; + Out << " PassManager PM;\n"; + Out << " PM.add(createPrintModulePass(&outs()));\n"; + Out << " PM.run(*Mod);\n"; + Out << " return 0;\n"; + Out << "}\n\n"; + printModule(fname,mName); +} - // Loop over the dependent libraries and emit them. - Module::lib_iterator LI = TheModule->lib_begin(); - Module::lib_iterator LE = TheModule->lib_end(); - while (LI != LE) { - Out << "mod->addLibrary(\"" << *LI << "\");"; - nl(Out); - ++LI; - } - printModuleBody(); - nl(Out) << "return mod;"; - nl(Out,-1) << "}"; +void CppWriter::printModule(const std::string& fname, + const std::string& mName) { + nl(Out) << "Module* " << fname << "() {"; + nl(Out,1) << "// Module Construction"; + nl(Out) << "Module* mod = new Module(\""; + printEscapedString(mName); + Out << "\", getGlobalContext());"; + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");"; + } + if (!TheModule->getTargetTriple().empty()) { + nl(Out) << "mod->setTargetTriple(\"" << TheModule->getTargetTriple() + << "\");"; + } + + if (!TheModule->getModuleInlineAsm().empty()) { + nl(Out) << "mod->setModuleInlineAsm(\""; + printEscapedString(TheModule->getModuleInlineAsm()); + Out << "\");"; + } + nl(Out); + + // Loop over the dependent libraries and emit them. + Module::lib_iterator LI = TheModule->lib_begin(); + Module::lib_iterator LE = TheModule->lib_end(); + while (LI != LE) { + Out << "mod->addLibrary(\"" << *LI << "\");"; nl(Out); + ++LI; } + printModuleBody(); + nl(Out) << "return mod;"; + nl(Out,-1) << "}"; + nl(Out); +} + +void CppWriter::printContents(const std::string& fname, + const std::string& mName) { + Out << "\nModule* " << fname << "(Module *mod) {\n"; + Out << "\nmod->setModuleIdentifier(\""; + printEscapedString(mName); + Out << "\");\n"; + printModuleBody(); + Out << "\nreturn mod;\n"; + Out << "\n}\n"; +} - void CppWriter::printContents(const std::string& fname, - const std::string& mName) { - Out << "\nModule* " << fname << "(Module *mod) {\n"; - Out << "\nmod->setModuleIdentifier(\""; - printEscapedString(mName); - Out << "\");\n"; - printModuleBody(); - Out << "\nreturn mod;\n"; - Out << "\n}\n"; +void CppWriter::printFunction(const std::string& fname, + const std::string& funcName) { + const Function* F = TheModule->getFunction(funcName); + if (!F) { + error(std::string("Function '") + funcName + "' not found in input module"); + return; } + Out << "\nFunction* " << fname << "(Module *mod) {\n"; + printFunctionUses(F); + printFunctionHead(F); + printFunctionBody(F); + Out << "return " << getCppName(F) << ";\n"; + Out << "}\n"; +} - void CppWriter::printFunction(const std::string& fname, - const std::string& funcName) { - const Function* F = TheModule->getFunction(funcName); - if (!F) { - error(std::string("Function '") + funcName + "' not found in input module"); - return; - } - Out << "\nFunction* " << fname << "(Module *mod) {\n"; - printFunctionUses(F); - printFunctionHead(F); - printFunctionBody(F); - Out << "return " << getCppName(F) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printFunctions() { - const Module::FunctionListType &funcs = TheModule->getFunctionList(); - Module::const_iterator I = funcs.begin(); - Module::const_iterator IE = funcs.end(); - - for (; I != IE; ++I) { - const Function &func = *I; - if (!func.isDeclaration()) { - std::string name("define_"); - name += func.getName(); - printFunction(name, func.getName()); - } +void CppWriter::printFunctions() { + const Module::FunctionListType &funcs = TheModule->getFunctionList(); + Module::const_iterator I = funcs.begin(); + Module::const_iterator IE = funcs.end(); + + for (; I != IE; ++I) { + const Function &func = *I; + if (!func.isDeclaration()) { + std::string name("define_"); + name += func.getName(); + printFunction(name, func.getName()); } } +} - void CppWriter::printVariable(const std::string& fname, - const std::string& varName) { - const GlobalVariable* GV = TheModule->getNamedGlobal(varName); +void CppWriter::printVariable(const std::string& fname, + const std::string& varName) { + const GlobalVariable* GV = TheModule->getNamedGlobal(varName); - if (!GV) { - error(std::string("Variable '") + varName + "' not found in input module"); - return; - } - Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; - printVariableUses(GV); - printVariableHead(GV); - printVariableBody(GV); - Out << "return " << getCppName(GV) << ";\n"; - Out << "}\n"; - } - - void CppWriter::printType(const std::string& fname, - const std::string& typeName) { - const Type* Ty = TheModule->getTypeByName(typeName); - if (!Ty) { - error(std::string("Type '") + typeName + "' not found in input module"); - return; - } - Out << "\nType* " << fname << "(Module *mod) {\n"; - printType(Ty); - Out << "return " << getCppName(Ty) << ";\n"; - Out << "}\n"; - } - - bool CppWriter::runOnModule(Module &M) { - TheModule = &M; - - // Emit a header - Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; - - // Get the name of the function we're supposed to generate - std::string fname = FuncName.getValue(); - - // Get the name of the thing we are to generate - std::string tgtname = NameToGenerate.getValue(); - if (GenerationType == GenModule || - GenerationType == GenContents || - GenerationType == GenProgram || - GenerationType == GenFunctions) { - if (tgtname == "!bad!") { - if (M.getModuleIdentifier() == "-") - tgtname = ""; - else - tgtname = M.getModuleIdentifier(); - } - } else if (tgtname == "!bad!") - error("You must use the -for option with -gen-{function,variable,type}"); - - switch (WhatToGenerate(GenerationType)) { - case GenProgram: - if (fname.empty()) - fname = "makeLLVMModule"; - printProgram(fname,tgtname); - break; - case GenModule: - if (fname.empty()) - fname = "makeLLVMModule"; - printModule(fname,tgtname); - break; - case GenContents: - if (fname.empty()) - fname = "makeLLVMModuleContents"; - printContents(fname,tgtname); - break; - case GenFunction: - if (fname.empty()) - fname = "makeLLVMFunction"; - printFunction(fname,tgtname); - break; - case GenFunctions: - printFunctions(); - break; - case GenInline: - if (fname.empty()) - fname = "makeLLVMInline"; - printInline(fname,tgtname); - break; - case GenVariable: - if (fname.empty()) - fname = "makeLLVMVariable"; - printVariable(fname,tgtname); - break; - case GenType: - if (fname.empty()) - fname = "makeLLVMType"; - printType(fname,tgtname); - break; - default: - error("Invalid generation option"); - } + if (!GV) { + error(std::string("Variable '") + varName + "' not found in input module"); + return; + } + Out << "\nGlobalVariable* " << fname << "(Module *mod) {\n"; + printVariableUses(GV); + printVariableHead(GV); + printVariableBody(GV); + Out << "return " << getCppName(GV) << ";\n"; + Out << "}\n"; +} - return false; +void CppWriter::printType(const std::string& fname, + const std::string& typeName) { + const Type* Ty = TheModule->getTypeByName(typeName); + if (!Ty) { + error(std::string("Type '") + typeName + "' not found in input module"); + return; } + Out << "\nType* " << fname << "(Module *mod) {\n"; + printType(Ty); + Out << "return " << getCppName(Ty) << ";\n"; + Out << "}\n"; +} + +bool CppWriter::runOnModule(Module &M) { + TheModule = &M; + + // Emit a header + Out << "// Generated by llvm2cpp - DO NOT MODIFY!\n\n"; + + // Get the name of the function we're supposed to generate + std::string fname = FuncName.getValue(); + + // Get the name of the thing we are to generate + std::string tgtname = NameToGenerate.getValue(); + if (GenerationType == GenModule || + GenerationType == GenContents || + GenerationType == GenProgram || + GenerationType == GenFunctions) { + if (tgtname == "!bad!") { + if (M.getModuleIdentifier() == "-") + tgtname = ""; + else + tgtname = M.getModuleIdentifier(); + } + } else if (tgtname == "!bad!") + error("You must use the -for option with -gen-{function,variable,type}"); + + switch (WhatToGenerate(GenerationType)) { + case GenProgram: + if (fname.empty()) + fname = "makeLLVMModule"; + printProgram(fname,tgtname); + break; + case GenModule: + if (fname.empty()) + fname = "makeLLVMModule"; + printModule(fname,tgtname); + break; + case GenContents: + if (fname.empty()) + fname = "makeLLVMModuleContents"; + printContents(fname,tgtname); + break; + case GenFunction: + if (fname.empty()) + fname = "makeLLVMFunction"; + printFunction(fname,tgtname); + break; + case GenFunctions: + printFunctions(); + break; + case GenInline: + if (fname.empty()) + fname = "makeLLVMInline"; + printInline(fname,tgtname); + break; + case GenVariable: + if (fname.empty()) + fname = "makeLLVMVariable"; + printVariable(fname,tgtname); + break; + case GenType: + if (fname.empty()) + fname = "makeLLVMType"; + printType(fname,tgtname); + break; + default: + error("Invalid generation option"); + } + + return false; } char CppWriter::ID = 0; diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp index e42e9b3..b6e4d65 100644 --- a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp @@ -145,8 +145,9 @@ void MBlazeAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(Reg); + if (MBlaze::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); } diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 23889b1..1730b68 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -234,6 +234,24 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineRegisterInfo &R = F->getRegInfo(); MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, loop); + F->insert(It, finish); + + // Update machine-CFG edges by transfering adding all successors and + // remaining instructions from the current block to the new block which + // will contain the Phi node for the select. + finish->splice(finish->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + finish->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(loop); + BB->addSuccessor(finish); + + // Next, add the finish block as a successor of the loop block + loop->addSuccessor(finish); + loop->addSuccessor(loop); unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT) @@ -249,26 +267,6 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(IAMT) .addMBB(finish); - F->insert(It, loop); - F->insert(It, finish); - - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - finish->addSuccessor(*i); - - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(loop); - BB->addSuccessor(finish); - - // Next, add the finish block as a successor of the loop block - loop->addSuccessor(finish); - loop->addSuccessor(loop); - unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass); BuildMI(loop, dl, TII->get(MBlaze::PHI), DST) @@ -298,12 +296,13 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, .addReg(NAMT) .addMBB(loop); - BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*finish, finish->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(IVAL).addMBB(BB) .addReg(NDST).addMBB(loop); // The pseudo instruction is no longer needed so remove it - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return finish; } @@ -338,27 +337,23 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case MBlazeCC::LE: Opc = MBlaze::BGTID; break; } - BuildMI(BB, dl, TII->get(Opc)) - .addReg(MI->getOperand(3).getReg()) - .addMBB(dneBB); - F->insert(It, flsBB); F->insert(It, dneBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - dneBB->addSuccessor(*i); + // Transfer the remainder of BB and its successor edges to dneBB. + dneBB->splice(dneBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + dneBB->transferSuccessorsAndUpdatePHIs(BB); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); BB->addSuccessor(flsBB); BB->addSuccessor(dneBB); flsBB->addSuccessor(dneBB); + BuildMI(BB, dl, TII->get(Opc)) + .addReg(MI->getOperand(3).getReg()) + .addMBB(dneBB); + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -366,11 +361,12 @@ MBlazeTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB) // .addReg(MI->getOperand(2).getReg()).addMBB(BB); - BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) + BuildMI(*dneBB, dneBB->begin(), dl, + TII->get(MBlaze::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(flsBB) .addReg(MI->getOperand(1).getReg()).addMBB(BB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return dneBB; } } @@ -408,7 +404,7 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA); } @@ -439,10 +435,8 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { SDValue MBlazeTargetLowering:: LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue ResNode; - EVT PtrVT = Op.getValueType(); ConstantPoolSDNode *N = cast(Op); const Constant *C = N->getConstVal(); - SDValue Zero = DAG.getConstant(0, PtrVT); DebugLoc dl = Op.getDebugLoc(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), @@ -531,6 +525,7 @@ SDValue MBlazeTargetLowering:: LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -562,7 +557,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -590,7 +585,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Create the frame index object for this incoming parameter LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -623,7 +618,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // node so that legalize doesn't hack it. unsigned char OpFlag = MBlazeII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -779,7 +774,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -810,7 +805,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MBlazeFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -841,6 +836,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue MBlazeTargetLowering:: LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of // the return value to a location @@ -869,7 +865,7 @@ LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h index 9f9ac89..5ec2563 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.h +++ b/lib/Target/MBlaze/MBlazeISelLowering.h @@ -109,6 +109,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -117,6 +118,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index 4c4d86b..6ff5825 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -110,15 +110,13 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { BuildMI(MBB, MI, DL, get(MBlaze::NOP)); } -bool MBlazeInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MBlazeInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { llvm::BuildMI(MBB, I, DL, get(MBlaze::ADD), DestReg) - .addReg(SrcReg).addReg(MBlaze::R0); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)).addReg(MBlaze::R0); } void MBlazeInstrInfo:: @@ -141,54 +139,17 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addImm(0).addFrameIndex(FI); } -MachineInstr *MBlazeInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, int FI) const { - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case MBlaze::OR: - case MBlaze::ADD: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(2).isReg()) && - (MI->getOperand(2).getReg() == MBlaze::R0) && - (MI->getOperand(1).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// unsigned MBlazeInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, DebugLoc(), get(MBlaze::BRI)).addMBB(TBB); + BuildMI(&MBB, DL, get(MBlaze::BRI)).addMBB(TBB); return 1; } @@ -209,12 +170,8 @@ unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20, - MBlaze::CPURegsRegisterClass, - MBlaze::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(MBlaze::R20); RegInfo.addLiveIn(MBlaze::R20); MBlazeFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index c9fdc88..f074370 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -198,13 +198,12 @@ public: /// Branch Analysis virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -217,18 +216,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - /// Insert nop instruction when hazard condition is found virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index f15eea9..8cafa8c 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -148,22 +148,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -/// MBlaze Callee Saved Register Classes -const TargetRegisterClass* const* MBlazeRegisterInfo:: -getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRC[] = { - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass, - 0 - }; - - return CalleeSavedRC; -} - BitVector MBlazeRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index b618bf4..af97b0e 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -54,9 +54,6 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index 3de173c..8f97d25 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -808,7 +808,7 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { std::string Name; switch (Inst->getIntrinsicID()) { case Intrinsic::vastart: - Name = getValueName(Inst->getOperand(1)); + Name = getValueName(Inst->getArgOperand(0)); Name.insert(Name.length()-1,"$valist"); // Obtain the argument handle. printSimpleInstruction("ldloca",Name.c_str()); @@ -817,20 +817,20 @@ void MSILWriter::printIntrinsicCall(const IntrinsicInst* Inst) { "instance void [mscorlib]System.ArgIterator::.ctor" "(valuetype [mscorlib]System.RuntimeArgumentHandle)"); // Save as pointer type "void*" - printValueLoad(Inst->getOperand(1)); + printValueLoad(Inst->getArgOperand(0)); printSimpleInstruction("ldloca",Name.c_str()); printIndirectSave(PointerType::getUnqual( IntegerType::get(Inst->getContext(), 8))); break; case Intrinsic::vaend: // Close argument list handle. - printIndirectLoad(Inst->getOperand(1)); + printIndirectLoad(Inst->getArgOperand(0)); printSimpleInstruction("call","instance void [mscorlib]System.ArgIterator::End()"); break; case Intrinsic::vacopy: // Copy "ArgIterator" valuetype. - printIndirectLoad(Inst->getOperand(1)); - printIndirectLoad(Inst->getOperand(2)); + printIndirectLoad(Inst->getArgOperand(0)); + printIndirectLoad(Inst->getArgOperand(1)); printSimpleInstruction("cpobj","[mscorlib]System.ArgIterator"); break; default: @@ -845,10 +845,11 @@ void MSILWriter::printCallInstruction(const Instruction* Inst) { // Handle intrinsic function. printIntrinsicCall(cast(Inst)); } else { + const CallInst *CI = cast(Inst); // Load arguments to stack and call function. - for (int I = 1, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); - printFunctionCall(Inst->getOperand(0),Inst); + for (int I = 0, E = CI->getNumArgOperands(); I!=E; ++I) + printValueLoad(CI->getArgOperand(I)); + printFunctionCall(CI->getCalledFunction(), Inst); } } @@ -1002,8 +1003,8 @@ void MSILWriter::printInvokeInstruction(const InvokeInst* Inst) { std::string Label = "leave$normal_"+utostr(getUniqID()); Out << ".try {\n"; // Load arguments - for (int I = 3, E = Inst->getNumOperands(); I!=E; ++I) - printValueLoad(Inst->getOperand(I)); + for (int I = 0, E = Inst->getNumArgOperands(); I!=E; ++I) + printValueLoad(Inst->getArgOperand(I)); // Print call instruction printFunctionCall(Inst->getOperand(0),Inst); // Save function result and leave "try" block @@ -1280,7 +1281,7 @@ void MSILWriter::printLocalVariables(const Function& F) { case Intrinsic::vaend: case Intrinsic::vacopy: isVaList = true; - VaList = Inst->getOperand(1); + VaList = Inst->getArgOperand(0); break; default: isVaList = false; diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index 7b328bb..3395e9f 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -272,7 +272,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, AM.Base.Reg; if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, Op->getDebugLoc(), + MVT::i16, AM.Disp, 0/*AM.SymbolFlags*/); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 403400e..a1703a3 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -278,6 +278,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -290,7 +291,7 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); case CallingConv::MSP430_INTR: report_fatal_error("ISRs cannot be called directly"); return SDValue(); @@ -369,7 +370,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -387,6 +388,7 @@ SDValue MSP430TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -421,7 +423,7 @@ MSP430TargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together, // avoiding something bad. @@ -447,6 +449,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -471,7 +474,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -529,7 +532,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i16); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i16); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i16); @@ -642,7 +645,8 @@ SDValue MSP430TargetLowering::LowerGlobalAddress(SDValue Op, int64_t Offset = cast(Op)->getOffset(); // Create the TargetGlobalAddress node, folding in the constant offset. - SDValue Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + SDValue Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + getPointerTy(), Offset); return DAG.getNode(MSP430ISD::Wrapper, Op.getDebugLoc(), getPointerTy(), Result); } @@ -888,7 +892,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - true, false); + true); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -1070,7 +1074,10 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // Update machine-CFG edges by transferring all successors of the current // block to the block containing instructions after shift. - RemBB->transferSuccessors(BB); + RemBB->splice(RemBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + RemBB->transferSuccessorsAndUpdatePHIs(BB); // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB BB->addSuccessor(LoopBB); @@ -1116,11 +1123,11 @@ MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI, // RemBB: // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB] - BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg) + BuildMI(*RemBB, RemBB->begin(), dl, TII.get(MSP430::PHI), DstReg) .addReg(SrcReg).addMBB(BB) .addReg(ShiftReg2).addMBB(LoopBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return RemBB; } @@ -1158,18 +1165,22 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(MSP430::JCC)) - .addMBB(copy1MBB) - .addImm(MI->getOperand(3).getImm()); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.get(MSP430::JCC)) + .addMBB(copy1MBB) + .addImm(MI->getOperand(3).getImm()); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -1182,11 +1193,11 @@ MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(MSP430::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(MSP430::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 01c5071..673c543 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -127,6 +127,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -155,6 +156,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -163,6 +165,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 18226ab..df28d07 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -83,27 +83,20 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Cannot store this register to stack slot!"); } -bool MSP430InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC == SrcRC) { - unsigned Opc; - if (DestRC == &MSP430::GR16RegClass) { - Opc = MSP430::MOV16rr; - } else if (DestRC == &MSP430::GR8RegClass) { - Opc = MSP430::MOV8rr; - } else { - return false; - } - - BuildMI(MBB, I, DL, get(Opc), DestReg).addReg(SrcReg); - return true; - } +void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (MSP430::GR16RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV16rr; + else if (MSP430::GR8RegClass.contains(DestReg, SrcReg)) + Opc = MSP430::MOV8rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -330,10 +323,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc DL; - + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 842b4cb..ebbda1a 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -49,11 +49,10 @@ public: /// virtual const TargetRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -93,7 +92,8 @@ public: unsigned RemoveBranch(MachineBasicBlock &MBB) const; unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 6b9a2f2..8792b22 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -25,13 +25,16 @@ class SDTCisI16 : SDTCisVT; def SDT_MSP430Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_MSP430CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>]>; def SDT_MSP430CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; -def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; +def SDT_MSP430Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, + SDTCisPtrTy<0>]>; def SDT_MSP430Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; -def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, +def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; -def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>; +def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisI8<2>]>; //===----------------------------------------------------------------------===// // MSP430 Specific Node Definitions. @@ -46,7 +49,7 @@ def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>; def MSP430rrc : SDNode<"MSP430ISD::RRC", SDTIntUnaryOp, []>; def MSP430call : SDNode<"MSP430ISD::CALL", SDT_MSP430Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; def MSP430callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MSP430CallSeqStart, [SDNPHasChain, SDNPOutFlag]>; @@ -55,8 +58,10 @@ def MSP430callseq_end : [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>; def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>; -def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>; -def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>; +def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, + [SDNPHasChain, SDNPInFlag]>; +def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, + [SDNPInFlag]>; def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>; def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>; def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>; @@ -117,14 +122,14 @@ def ADJCALLSTACKUP : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), } let usesCustomInserter = 1 in { - def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cc), + def Select8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$src2, i8imm:$cc), "# Select8 PSEUDO", [(set GR8:$dst, - (MSP430selectcc GR8:$src1, GR8:$src2, imm:$cc))]>; - def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2, i8imm:$cc), + (MSP430selectcc GR8:$src, GR8:$src2, imm:$cc))]>; + def Select16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR16:$src2, i8imm:$cc), "# Select16 PSEUDO", [(set GR16:$dst, - (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>; + (MSP430selectcc GR16:$src, GR16:$src2, imm:$cc))]>; let Defs = [SRW] in { def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt), "# Shl8 PSEUDO", @@ -330,60 +335,60 @@ def MOV16mm : I16mm<0x0, //===----------------------------------------------------------------------===// // Arithmetic Instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { let Defs = [SRW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def ADD8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (add GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADD16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (add GR16:$src, GR16:$src2)), (implicit SRW)]>; } def ADD8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (add GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADD16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (add GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def ADD8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "add.b\t{@$base+, $dst}", []>; def ADD16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "add.w\t{@$base+, $dst}", []>; } def ADD8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "add.b\t{$src2, $dst}", - [(set GR8:$dst, (add GR8:$src1, imm:$src2)), + [(set GR8:$dst, (add GR8:$src, imm:$src2)), (implicit SRW)]>; def ADD16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "add.w\t{$src2, $dst}", - [(set GR16:$dst, (add GR16:$src1, imm:$src2)), + [(set GR16:$dst, (add GR16:$src, imm:$src2)), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADD8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "add.b\t{$src, $dst}", @@ -424,40 +429,40 @@ let Uses = [SRW] in { let isCommutable = 1 in { // X = ADDC Y, Z == X = ADDC Z, Y def ADC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (adde GR8:$src, GR8:$src2)), (implicit SRW)]>; def ADC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (adde GR16:$src, GR16:$src2)), (implicit SRW)]>; } // isCommutable def ADC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, imm:$src2)), + [(set GR8:$dst, (adde GR8:$src, imm:$src2)), (implicit SRW)]>; def ADC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, imm:$src2)), + [(set GR16:$dst, (adde GR16:$src, imm:$src2)), (implicit SRW)]>; def ADC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "addc.b\t{$src2, $dst}", - [(set GR8:$dst, (adde GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (adde GR8:$src, (load addr:$src2))), (implicit SRW)]>; def ADC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "addc.w\t{$src2, $dst}", - [(set GR16:$dst, (adde GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (adde GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "addc.b\t{$src, $dst}", @@ -498,52 +503,52 @@ def ADC16mm : I8mm<0x0, let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y def AND8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (and GR8:$src, GR8:$src2)), (implicit SRW)]>; def AND16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (and GR16:$src, GR16:$src2)), (implicit SRW)]>; } def AND8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, imm:$src2)), + [(set GR8:$dst, (and GR8:$src, imm:$src2)), (implicit SRW)]>; def AND16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, imm:$src2)), + [(set GR16:$dst, (and GR16:$src, imm:$src2)), (implicit SRW)]>; def AND8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "and.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (and GR8:$src, (load addr:$src2))), (implicit SRW)]>; def AND16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "and.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (and GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def AND8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "and.b\t{@$base+, $dst}", []>; def AND16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "and.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "and.b\t{$src, $dst}", @@ -582,46 +587,46 @@ def AND16mm : I16mm<0x0, let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y def OR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; + [(set GR8:$dst, (or GR8:$src, GR8:$src2))]>; def OR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; + [(set GR16:$dst, (or GR16:$src, GR16:$src2))]>; } def OR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; + [(set GR8:$dst, (or GR8:$src, imm:$src2))]>; def OR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + [(set GR16:$dst, (or GR16:$src, imm:$src2))]>; def OR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; + [(set GR8:$dst, (or GR8:$src, (load addr:$src2)))]>; def OR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + [(set GR16:$dst, (or GR16:$src, (load addr:$src2)))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def OR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "bis.b\t{@$base+, $dst}", []>; def OR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "bis.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bis.b\t{$src, $dst}", @@ -654,24 +659,24 @@ def OR16mm : I16mm<0x0, // bic does not modify condition codes def BIC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>; + [(set GR8:$dst, (and GR8:$src, (not GR8:$src2)))]>; def BIC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>; + [(set GR16:$dst, (and GR16:$src, (not GR16:$src2)))]>; def BIC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "bic.b\t{$src2, $dst}", - [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>; + [(set GR8:$dst, (and GR8:$src, (not (i8 (load addr:$src2)))))]>; def BIC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "bic.w\t{$src2, $dst}", - [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>; + [(set GR16:$dst, (and GR16:$src, (not (i16 (load addr:$src2)))))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def BIC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "bic.b\t{$src, $dst}", @@ -695,52 +700,52 @@ def BIC16mm : I16mm<0x0, let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y def XOR8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (xor GR8:$src, GR8:$src2)), (implicit SRW)]>; def XOR16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (xor GR16:$src, GR16:$src2)), (implicit SRW)]>; } def XOR8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, imm:$src2)), + [(set GR8:$dst, (xor GR8:$src, imm:$src2)), (implicit SRW)]>; def XOR16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, imm:$src2)), + [(set GR16:$dst, (xor GR16:$src, imm:$src2)), (implicit SRW)]>; def XOR8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "xor.b\t{$src2, $dst}", - [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (xor GR8:$src, (load addr:$src2))), (implicit SRW)]>; def XOR16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "xor.w\t{$src2, $dst}", - [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (xor GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def XOR8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "xor.b\t{@$base+, $dst}", []>; def XOR16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "xor.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "xor.b\t{$src, $dst}", @@ -777,51 +782,51 @@ def XOR16mm : I16mm<0x0, def SUB8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sub GR8:$src, GR8:$src2)), (implicit SRW)]>; def SUB16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sub GR16:$src, GR16:$src2)), (implicit SRW)]>; def SUB8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sub GR8:$src, imm:$src2)), (implicit SRW)]>; def SUB16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sub GR16:$src, imm:$src2)), (implicit SRW)]>; def SUB8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "sub.b\t{$src2, $dst}", - [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sub GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SUB16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "sub.w\t{$src2, $dst}", - [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sub GR16:$src, (load addr:$src2))), (implicit SRW)]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1, -Constraints = "$base = $base_wb, $src1 = $dst" in { +Constraints = "$base = $base_wb, $src = $dst" in { def SUB8rm_POST : IForm8<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR8:$dst, GR16:$base_wb), - (ins GR8:$src1, GR16:$base), + (ins GR8:$src, GR16:$base), "sub.b\t{@$base+, $dst}", []>; def SUB16rm_POST : IForm16<0x0, DstReg, SrcPostInc, Size2Bytes, (outs GR16:$dst, GR16:$base_wb), - (ins GR16:$src1, GR16:$base), + (ins GR16:$src, GR16:$base), "sub.w\t{@$base+, $dst}", []>; } -let isTwoAddress = 0 in { +let Constraints = "" in { def SUB8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "sub.b\t{$src, $dst}", @@ -860,39 +865,39 @@ def SUB16mm : I16mm<0x0, let Uses = [SRW] in { def SBC8rr : I8rr<0x0, - (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + (outs GR8:$dst), (ins GR8:$src, GR8:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, GR8:$src2)), + [(set GR8:$dst, (sube GR8:$src, GR8:$src2)), (implicit SRW)]>; def SBC16rr : I16rr<0x0, - (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + (outs GR16:$dst), (ins GR16:$src, GR16:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, GR16:$src2)), + [(set GR16:$dst, (sube GR16:$src, GR16:$src2)), (implicit SRW)]>; def SBC8ri : I8ri<0x0, - (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + (outs GR8:$dst), (ins GR8:$src, i8imm:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, imm:$src2)), + [(set GR8:$dst, (sube GR8:$src, imm:$src2)), (implicit SRW)]>; def SBC16ri : I16ri<0x0, - (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + (outs GR16:$dst), (ins GR16:$src, i16imm:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, imm:$src2)), + [(set GR16:$dst, (sube GR16:$src, imm:$src2)), (implicit SRW)]>; def SBC8rm : I8rm<0x0, - (outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + (outs GR8:$dst), (ins GR8:$src, memsrc:$src2), "subc.b\t{$src2, $dst}", - [(set GR8:$dst, (sube GR8:$src1, (load addr:$src2))), + [(set GR8:$dst, (sube GR8:$src, (load addr:$src2))), (implicit SRW)]>; def SBC16rm : I16rm<0x0, - (outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + (outs GR16:$dst), (ins GR16:$src, memsrc:$src2), "subc.w\t{$src2, $dst}", - [(set GR16:$dst, (sube GR16:$src1, (load addr:$src2))), + [(set GR16:$dst, (sube GR16:$src, (load addr:$src2))), (implicit SRW)]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBC8mr : I8mr<0x0, (outs), (ins memdst:$dst, GR8:$src), "subc.b\t{$src, $dst}", @@ -985,59 +990,59 @@ def SWPB16r : II16r<0x0, "swpb\t$dst", [(set GR16:$dst, (bswap GR16:$src))]>; -} // isTwoAddress = 1 +} // Constraints = "$src = $dst" // Integer comparisons let Defs = [SRW] in { def CMP8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, GR8:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, GR8:$src2), (implicit SRW)]>; def CMP16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, GR16:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, GR16:$src2), (implicit SRW)]>; def CMP8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR8:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, imm:$src2), (implicit SRW)]>; def CMP16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, imm:$src2), (implicit SRW)]>; + (outs), (ins GR16:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, imm:$src2), (implicit SRW)]>; def CMP8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i8imm:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i8 imm:$src2)), (implicit SRW)]>; def CMP16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), + (outs), (ins memsrc:$src, i16imm:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), (i16 imm:$src2)), (implicit SRW)]>; def CMP8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memsrc:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp GR8:$src1, (load addr:$src2)), + (outs), (ins GR8:$src, memsrc:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp GR8:$src, (load addr:$src2)), (implicit SRW)]>; def CMP16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memsrc:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp GR16:$src1, (load addr:$src2)), + (outs), (ins GR16:$src, memsrc:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp GR16:$src, (load addr:$src2)), (implicit SRW)]>; def CMP8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "cmp.b\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR8:$src2), + (outs), (ins memsrc:$src, GR8:$src2), + "cmp.b\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR8:$src2), (implicit SRW)]>; def CMP16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "cmp.w\t{$src2, $src1}", - [(MSP430cmp (load addr:$src1), GR16:$src2), + (outs), (ins memsrc:$src, GR16:$src2), + "cmp.w\t{$src2, $src}", + [(MSP430cmp (load addr:$src), GR16:$src2), (implicit SRW)]>; @@ -1045,71 +1050,71 @@ def CMP16mr : I16mr<0x0, // Note that the C condition is set differently than when using CMP. let isCommutable = 1 in { def BIT8rr : I8rr<0x0, - (outs), (ins GR8:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, GR8:$src2), 0), + (outs), (ins GR8:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, GR8:$src2), 0), (implicit SRW)]>; def BIT16rr : I16rr<0x0, - (outs), (ins GR16:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, GR16:$src2), 0), + (outs), (ins GR16:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, GR16:$src2), 0), (implicit SRW)]>; } def BIT8ri : I8ri<0x0, - (outs), (ins GR8:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, imm:$src2), 0), + (outs), (ins GR8:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, imm:$src2), 0), (implicit SRW)]>; def BIT16ri : I16ri<0x0, - (outs), (ins GR16:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, imm:$src2), 0), + (outs), (ins GR16:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, imm:$src2), 0), (implicit SRW)]>; def BIT8rm : I8rm<0x0, - (outs), (ins GR8:$src1, memdst:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su GR8:$src1, (load addr:$src2)), 0), + (outs), (ins GR8:$src, memdst:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su GR8:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT16rm : I16rm<0x0, - (outs), (ins GR16:$src1, memdst:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su GR16:$src1, (load addr:$src2)), 0), + (outs), (ins GR16:$src, memdst:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su GR16:$src, (load addr:$src2)), 0), (implicit SRW)]>; def BIT8mr : I8mr<0x0, - (outs), (ins memsrc:$src1, GR8:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR8:$src2), 0), + (outs), (ins memsrc:$src, GR8:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR8:$src2), 0), (implicit SRW)]>; def BIT16mr : I16mr<0x0, - (outs), (ins memsrc:$src1, GR16:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), GR16:$src2), 0), + (outs), (ins memsrc:$src, GR16:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), GR16:$src2), 0), (implicit SRW)]>; def BIT8mi : I8mi<0x0, - (outs), (ins memsrc:$src1, i8imm:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i8 imm:$src2)), 0), + (outs), (ins memsrc:$src, i8imm:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i8 imm:$src2)), 0), (implicit SRW)]>; def BIT16mi : I16mi<0x0, - (outs), (ins memsrc:$src1, i16imm:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (load addr:$src1), (i16 imm:$src2)), 0), + (outs), (ins memsrc:$src, i16imm:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (load addr:$src), (i16 imm:$src2)), 0), (implicit SRW)]>; def BIT8mm : I8mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.b\t{$src2, $src1}", - [(MSP430cmp (and_su (i8 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.b\t{$src2, $src}", + [(MSP430cmp (and_su (i8 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; def BIT16mm : I16mm<0x0, - (outs), (ins memsrc:$src1, memsrc:$src2), - "bit.w\t{$src2, $src1}", - [(MSP430cmp (and_su (i16 (load addr:$src1)), + (outs), (ins memsrc:$src, memsrc:$src2), + "bit.w\t{$src2, $src}", + [(MSP430cmp (and_su (i16 (load addr:$src)), (load addr:$src2)), 0), (implicit SRW)]>; @@ -1134,12 +1139,12 @@ def : Pat<(i16 (MSP430Wrapper tglobaladdr:$dst)), (MOV16ri tglobaladdr:$dst)>; def : Pat<(i16 (MSP430Wrapper texternalsym:$dst)), (MOV16ri texternalsym:$dst)>; def : Pat<(i16 (MSP430Wrapper tblockaddress:$dst)), (MOV16ri tblockaddress:$dst)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tglobaladdr :$src2)), - (ADD16ri GR16:$src1, tglobaladdr:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper texternalsym:$src2)), - (ADD16ri GR16:$src1, texternalsym:$src2)>; -def : Pat<(add GR16:$src1, (MSP430Wrapper tblockaddress:$src2)), - (ADD16ri GR16:$src1, tblockaddress:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tglobaladdr :$src2)), + (ADD16ri GR16:$src, tglobaladdr:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper texternalsym:$src2)), + (ADD16ri GR16:$src, texternalsym:$src2)>; +def : Pat<(add GR16:$src, (MSP430Wrapper tblockaddress:$src2)), + (ADD16ri GR16:$src, tblockaddress:$src2)>; def : Pat<(store (i16 (MSP430Wrapper tglobaladdr:$src)), addr:$dst), (MOV16mi addr:$dst, tglobaladdr:$src)>; @@ -1155,45 +1160,45 @@ def : Pat<(MSP430call (i16 texternalsym:$dst)), (CALLi texternalsym:$dst)>; // add and sub always produce carry -def : Pat<(addc GR16:$src1, GR16:$src2), - (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(addc GR16:$src1, (load addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(addc GR16:$src1, imm:$src2), - (ADD16ri GR16:$src1, imm:$src2)>; +def : Pat<(addc GR16:$src, GR16:$src2), + (ADD16rr GR16:$src, GR16:$src2)>; +def : Pat<(addc GR16:$src, (load addr:$src2)), + (ADD16rm GR16:$src, addr:$src2)>; +def : Pat<(addc GR16:$src, imm:$src2), + (ADD16ri GR16:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR16:$src), addr:$dst), (ADD16mr addr:$dst, GR16:$src)>; def : Pat<(store (addc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (ADD16mm addr:$dst, addr:$src)>; -def : Pat<(addc GR8:$src1, GR8:$src2), - (ADD8rr GR8:$src1, GR8:$src2)>; -def : Pat<(addc GR8:$src1, (load addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(addc GR8:$src1, imm:$src2), - (ADD8ri GR8:$src1, imm:$src2)>; +def : Pat<(addc GR8:$src, GR8:$src2), + (ADD8rr GR8:$src, GR8:$src2)>; +def : Pat<(addc GR8:$src, (load addr:$src2)), + (ADD8rm GR8:$src, addr:$src2)>; +def : Pat<(addc GR8:$src, imm:$src2), + (ADD8ri GR8:$src, imm:$src2)>; def : Pat<(store (addc (load addr:$dst), GR8:$src), addr:$dst), (ADD8mr addr:$dst, GR8:$src)>; def : Pat<(store (addc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), (ADD8mm addr:$dst, addr:$src)>; -def : Pat<(subc GR16:$src1, GR16:$src2), - (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(subc GR16:$src1, (load addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(subc GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; +def : Pat<(subc GR16:$src, GR16:$src2), + (SUB16rr GR16:$src, GR16:$src2)>; +def : Pat<(subc GR16:$src, (load addr:$src2)), + (SUB16rm GR16:$src, addr:$src2)>; +def : Pat<(subc GR16:$src, imm:$src2), + (SUB16ri GR16:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR16:$src), addr:$dst), (SUB16mr addr:$dst, GR16:$src)>; def : Pat<(store (subc (load addr:$dst), (i16 (load addr:$src))), addr:$dst), (SUB16mm addr:$dst, addr:$src)>; -def : Pat<(subc GR8:$src1, GR8:$src2), - (SUB8rr GR8:$src1, GR8:$src2)>; -def : Pat<(subc GR8:$src1, (load addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(subc GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; +def : Pat<(subc GR8:$src, GR8:$src2), + (SUB8rr GR8:$src, GR8:$src2)>; +def : Pat<(subc GR8:$src, (load addr:$src2)), + (SUB8rm GR8:$src, addr:$src2)>; +def : Pat<(subc GR8:$src, imm:$src2), + (SUB8ri GR8:$src, imm:$src2)>; def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), (SUB8mr addr:$dst, GR8:$src)>; def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), @@ -1201,6 +1206,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), // peephole patterns def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; -def : Pat<(MSP430cmp (trunc (and_su GR16:$src1, GR16:$src2)), 0), - (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit), +def : Pat<(MSP430cmp (trunc (and_su GR16:$src, GR16:$src2)), 0), + (BIT8rr (EXTRACT_SUBREG GR16:$src, subreg_8bit), (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>; diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 0cae267..608ca49 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -71,48 +71,6 @@ MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } -const TargetRegisterClass *const * -MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - const Function* F = MF->getFunction(); - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesIntrFP[] = { - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, &MSP430::GR16RegClass, - &MSP430::GR16RegClass, 0 - }; - - if (hasFP(*MF)) - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntrFP : CalleeSavedRegClassesFP); - else - return (F->getCallingConv() == CallingConv::MSP430_INTR ? - CalleeSavedRegClassesIntr : CalleeSavedRegClasses); -} - BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); @@ -270,8 +228,8 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { - int ATTRIBUTE_UNUSED FrameIdx = - MF.getFrameInfo()->CreateFixedObject(2, -4, true, false); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true); + (void)FrameIdx; assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for FPW register must be last in order to be found!"); } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index c8684df..6e58d31 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -36,9 +36,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* getPointerRegClass(unsigned Kind = 0) const; diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp index 4ef017a..2037a91 100644 --- a/lib/Target/Mangler.cpp +++ b/lib/Target/Mangler.cpp @@ -180,7 +180,7 @@ void Mangler::getNameWithPrefix(SmallVectorImpl &OutName, ManglerPrefixTy PrefixTy = Mangler::Default; if (GV->hasPrivateLinkage() || isImplicitlyPrivate) PrefixTy = Mangler::Private; - else if (GV->hasLinkerPrivateLinkage()) + else if (GV->hasLinkerPrivateLinkage() || GV->hasLinkerPrivateWeakLinkage()) PrefixTy = Mangler::LinkerPrivate; // If this global has a name, handle it simply. diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 4d7fe4c..8ae05b7 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -133,8 +133,9 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const MachineFrameInfo *MFI = MF->getFrameInfo(); const std::vector &CSI = MFI->getCalleeSavedInfo(); for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(CSI[i].getReg()); - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + unsigned RegNum = MipsRegisterInfo::getRegisterNumbering(Reg); + if (Mips::CPURegsRegisterClass->contains(Reg)) CPUBitmask |= (1 << RegNum); else FPUBitmask |= (1 << RegNum); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e979c3f..b6ff2c3 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -284,6 +284,18 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); // Emit the right instruction according to the type of the operands compared if (isFPCmp) { @@ -296,20 +308,6 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) .addReg(Mips::ZERO).addMBB(sinkMBB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), - e = BB->succ_end(); i != e; ++i) - sinkMBB->addSuccessor(*i); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while(!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); - // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -322,11 +320,12 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } } @@ -490,21 +489,21 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, // %gp_rel relocation if (TLOF.IsGlobalInSmallSection(GV, getTargetMachine())) { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GPREL); SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, dl, VTs, &GA, 1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32, 0, + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); SDValue ResNode = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), GA, NULL, 0, @@ -768,6 +767,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -787,7 +787,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); + MFI->CreateFixedObject(VTsize, (VTsize*3), true); CCInfo.AnalyzeCallOperands(Outs, isVarArg ? CC_MipsO32_VarArgs : CC_MipsO32); } else @@ -808,7 +808,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; // Promote the value if needed. @@ -857,7 +857,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // if O32 ABI is used. For EABI the first address is zero. LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc, true, false); + LastArgStackLoc, true); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -889,7 +889,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), @@ -929,7 +929,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0, true, false); + FI = MFI->CreateFixedObject(4, 0, true); MipsFI->setGPFI(FI); } MipsFI->setGPStackOffset(LastArgStackLoc); @@ -1098,7 +1098,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); + int FI = MFI->CreateFixedObject(ArgSize, 0, true); MipsFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); @@ -1137,7 +1137,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true, false); + int FI = MFI->CreateFixedObject(4, 0, true); MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0, @@ -1169,6 +1169,7 @@ SDValue MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1198,7 +1199,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index f2de489..460747b 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -128,6 +129,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual MachineBasicBlock * diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 4005e35..6c09a3e 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -127,61 +127,75 @@ insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const BuildMI(MBB, MI, DL, get(Mips::NOP)); } -bool MipsInstrInfo:: -copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { +void MipsInstrInfo:: +copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool DestCPU = Mips::CPURegsRegClass.contains(DestReg); + bool SrcCPU = Mips::CPURegsRegClass.contains(SrcReg); + + // CPU-CPU is the most common. + if (DestCPU && SrcCPU) { + BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - if (DestRC != SrcRC) { - - // Copy to/from FCR31 condition register - if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::CCRRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::CCRRegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg).addReg(SrcReg); - - // Moves between coprocessors and cpu - else if ((DestRC == Mips::CPURegsRegisterClass) && - (SrcRC == Mips::FGR32RegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg).addReg(SrcReg); - else if ((DestRC == Mips::FGR32RegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) - BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); - - // Move from/to Hi/Lo registers - else if ((DestRC == Mips::HILORegisterClass) && - (SrcRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (DestReg == Mips::HI) ? Mips::MTHI : Mips::MTLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else if ((SrcRC == Mips::HILORegisterClass) && - (DestRC == Mips::CPURegsRegisterClass)) { - unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; - BuildMI(MBB, I, DL, get(Opc), DestReg); - } else - // Can't copy this register - return false; + // Copy to CPU from other registers. + if (DestCPU) { + if (Mips::CCRRegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::CFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(SrcReg)) + BuildMI(MBB, I, DL, get(Mips::MFC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SrcReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MFHI), DestReg); + else if (SrcReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MFLO), DestReg); + else + llvm_unreachable("Copy to CPU from invalid register"); + return; + } - return true; + // Copy to other registers from CPU. + if (SrcCPU) { + if (Mips::CCRRegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::CTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (Mips::FGR32RegClass.contains(DestReg)) + BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::HI) + BuildMI(MBB, I, DL, get(Mips::MTHI)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (DestReg == Mips::LO) + BuildMI(MBB, I, DL, get(Mips::MTLO)) + .addReg(SrcReg, getKillRegState(KillSrc)); + else + llvm_unreachable("Copy from CPU to invalid register"); + return; } - if (DestRC == Mips::CPURegsRegisterClass) - BuildMI(MBB, I, DL, get(Mips::ADDu), DestReg).addReg(Mips::ZERO) - .addReg(SrcReg); - else if (DestRC == Mips::FGR32RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::AFGR64RegisterClass) - BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg).addReg(SrcReg); - else if (DestRC == Mips::CCRRegisterClass) - BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg).addReg(SrcReg); - else - // Can't copy this register - return false; + if (Mips::FGR32RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_S32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } - return true; + if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + + if (Mips::CCRRegClass.contains(DestReg, SrcReg)) { + BuildMI(MBB, I, DL, get(Mips::MOVCCRToCCR), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; + } + llvm_unreachable("Cannot copy registers"); } void MipsInstrInfo:: @@ -247,80 +261,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Register class not handled!"); } -MachineInstr *MipsInstrInfo:: -foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, int FI) const -{ - if (Ops.size() != 1) return NULL; - - MachineInstr *NewMI = NULL; - - switch (MI->getOpcode()) { - case Mips::ADDu: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg()) && - (MI->getOperand(1).getReg() == Mips::ZERO) && - (MI->getOperand(2).isReg())) { - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(2).getReg(); - bool isKill = MI->getOperand(2).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::SW)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(Mips::LW)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - case Mips::FMOV_S32: - case Mips::FMOV_D32: - if ((MI->getOperand(0).isReg()) && - (MI->getOperand(1).isReg())) { - const TargetRegisterClass - *RC = RI.getRegClass(MI->getOperand(0).getReg()); - unsigned StoreOpc, LoadOpc; - bool IsMips1 = TM.getSubtarget().isMips1(); - - if (RC == Mips::FGR32RegisterClass) { - LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; - } else { - assert(RC == Mips::AFGR64RegisterClass); - // Mips1 doesn't have ldc/sdc instructions. - if (IsMips1) break; - LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; - } - - if (Ops[0] == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(2).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(StoreOpc)) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI) ; - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(LoadOpc)) - .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | - getUndefRegState(isUndef)) - .addImm(0).addFrameIndex(FI); - } - } - break; - } - - return NewMI; -} - //===----------------------------------------------------------------------===// // Branch Analysis //===----------------------------------------------------------------------===// @@ -520,9 +460,8 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, unsigned MipsInstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && @@ -531,18 +470,18 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - BuildMI(&MBB, dl, get(Mips::J)).addMBB(TBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) .addMBB(TBB); } @@ -554,12 +493,12 @@ InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, const TargetInstrDesc &TID = get(Opc); if (TID.getNumOperands() == 3) - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) .addMBB(TBB); else - BuildMI(&MBB, dl, TID).addReg(Cond[1].getReg()).addMBB(TBB); + BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(Mips::J)).addMBB(FBB); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); return 2; } @@ -621,12 +560,8 @@ unsigned MipsInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); GlobalBaseReg = RegInfo.createVirtualRegister(Mips::CPURegsRegisterClass); - bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, Mips::GP, - Mips::CPURegsRegisterClass, - Mips::CPURegsRegisterClass, - DebugLoc()); - assert(Ok && "Couldn't assign to global base register!"); - Ok = Ok; // Silence warning when assertions are turned off. + BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), + GlobalBaseReg).addReg(Mips::GP); RegInfo.addLiveIn(Mips::GP); MipsFI->setGlobalBaseReg(GlobalBaseReg); diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 7919d9a..d6f87f9 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -204,13 +204,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg, bool isKill, int FrameIndex, @@ -223,18 +222,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 2b9e941..5337c9f 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -541,7 +541,7 @@ let Predicates = [HasSwap] in { def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; -let Predicates = [HasCondMov], isTwoAddress = 1 in { +let Predicates = [HasCondMov], Constraints = "$F = $dst" in { def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 5e719af..e15f0a5 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -116,34 +116,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const return BitMode32CalleeSavedRegs; } -/// Mips Callee Saved Register Classes -const TargetRegisterClass* const* -MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const -{ - static const TargetRegisterClass * const SingleFloatOnlyCalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - static const TargetRegisterClass * const BitMode32CalleeSavedRC[] = { - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 - }; - - if (Subtarget.isSingleFloat()) - return SingleFloatOnlyCalleeSavedRC; - else - return BitMode32CalleeSavedRC; -} - BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { @@ -279,7 +251,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const StackOffset = ((StackOffset+StackAlign-1)/StackAlign*StackAlign); for (unsigned i = 0, e = CSI.size(); i != e ; ++i) { - if (CSI[i].getRegClass() != Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (!Mips::CPURegsRegisterClass->contains(Reg)) break; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopCPUSavedRegOff = StackOffset; @@ -311,7 +284,8 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const // Adjust FPU Callee Saved Registers Area. This Area must be // aligned to the default Stack Alignment requirements. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - if (CSI[i].getRegClass() == Mips::CPURegsRegisterClass) + unsigned Reg = CSI[i].getReg(); + if (Mips::CPURegsRegisterClass->contains(Reg)) continue; MFI->setObjectOffset(CSI[i].getFrameIdx(), StackOffset); TopFPUSavedRegOff = StackOffset; @@ -528,4 +502,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "MipsGenRegisterInfo.inc" - diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index bc857b8..b500a65 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -42,9 +42,6 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction* MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index f479f46..54a6a28 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -672,7 +672,8 @@ SDValue PIC16TargetLowering::ExpandGlobalAddress(SDNode *N, // FIXME there isn't really debug info here DebugLoc dl = G->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i8, + SDValue TGA = DAG.getTargetGlobalAddress(G->getGlobal(), N->getDebugLoc(), + MVT::i8, G->getOffset()); SDValue Offset = DAG.getConstant(0, MVT::i8); @@ -1120,6 +1121,7 @@ SDValue PIC16TargetLowering:: LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); @@ -1136,7 +1138,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, unsigned RetVals = Ins.size(); for (unsigned i = 0, ArgOffset = RetVals; i < NumOps; i++) { // Get the arguments - Arg = Outs[i].Val; + Arg = OutVals[i]; Ops.clear(); Ops.push_back(Chain); @@ -1158,6 +1160,7 @@ LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue PIC16TargetLowering:: LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { unsigned NumOps = Outs.size(); std::string Name; @@ -1183,7 +1186,7 @@ LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); for (unsigned i=0, Offset = 0; i &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // Number of values to return @@ -1298,7 +1302,7 @@ PIC16TargetLowering::LowerReturn(SDValue Chain, SDValue BS = DAG.getConstant(1, MVT::i8); SDValue RetVal; for(unsigned i=0;i &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1428,7 +1433,7 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Considering the GlobalAddressNode case here. if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); - Callee = DAG.getTargetGlobalAddress(GV, MVT::i8); + Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i8); Name = G->getGlobal()->getName(); } else {// Considering the ExternalSymbol case here ExternalSymbolSDNode *ES = dyn_cast(Callee); @@ -1461,12 +1466,13 @@ PIC16TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue CallArgs; if (IsDirectCall) { CallArgs = LowerDirectCallArguments(ArgLabel, Chain, OperFlag, - Outs, dl, DAG); + Outs, OutVals, dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } else { CallArgs = LowerIndirectCallArguments(Chain, OperFlag, DataAddr_Lo, - DataAddr_Hi, Outs, Ins, dl, DAG); + DataAddr_Hi, Outs, OutVals, Ins, + dl, DAG); Chain = getChain(CallArgs); OperFlag = getOutFlag(CallArgs); } @@ -1791,14 +1797,14 @@ static PIC16CC::CondCodes IntCCToPIC16CC(ISD::CondCode CC) { static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa(RHS) && - cast(RHS)->getZExtValue() == 0 && + cast(RHS)->isNullValue() && CC == ISD::SETNE && (LHS.getOpcode() == PIC16ISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == PIC16ISD::SUBCC) && isa(LHS.getOperand(0)) && isa(LHS.getOperand(1)) && - cast(LHS.getOperand(0))->getZExtValue() == 1 && - cast(LHS.getOperand(1))->getZExtValue() == 0) { + cast(LHS.getOperand(0))->isOne() && + cast(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -1928,15 +1934,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); @@ -1953,11 +1956,12 @@ PIC16TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(PIC16::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(PIC16::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/PIC16/PIC16ISelLowering.h b/lib/Target/PIC16/PIC16ISelLowering.h index eea17f8..0a7506c 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.h +++ b/lib/Target/PIC16/PIC16ISelLowering.h @@ -106,12 +106,14 @@ namespace llvm { SDValue LowerDirectCallArguments(SDValue ArgLabel, SDValue Chain, SDValue InFlag, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerIndirectCallArguments(SDValue Chain, SDValue InFlag, SDValue DataAddr_Lo, SDValue DataAddr_Hi, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG) const; @@ -143,6 +145,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -151,6 +154,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue ExpandStore(SDNode *N, SelectionDAG &DAG) const; diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp index 793dd9f..e784f74 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.cpp +++ b/lib/Target/PIC16/PIC16InstrInfo.cpp @@ -151,25 +151,20 @@ void PIC16InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, llvm_unreachable("Can't load this register from stack slot"); } -bool PIC16InstrInfo::copyRegToReg (MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == PIC16::FSR16RegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_fsr), DestReg).addReg(SrcReg); - return true; - } - - if (DestRC == PIC16::GPRRegisterClass) { - BuildMI(MBB, I, DL, get(PIC16::copy_w), DestReg).addReg(SrcReg); - return true; - } +void PIC16InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PIC16::FSR16RegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_fsr; + else if (PIC16::GPRRegClass.contains(DestReg, SrcReg)) + Opc = PIC16::copy_w; + else + llvm_unreachable("Impossible reg-to-reg copy"); - // Not yet supported. - return false; + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, @@ -196,15 +191,15 @@ bool PIC16InstrInfo::isMoveInstr(const MachineInstr &MI, unsigned PIC16InstrInfo:: InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch? - DebugLoc dl; - BuildMI(&MBB, dl, get(PIC16::br_uncond)).addMBB(TBB); + BuildMI(&MBB, DL, get(PIC16::br_uncond)).addMBB(TBB); } return 1; } diff --git a/lib/Target/PIC16/PIC16InstrInfo.h b/lib/Target/PIC16/PIC16InstrInfo.h index 40a4cb4..a3a77f1 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.h +++ b/lib/Target/PIC16/PIC16InstrInfo.h @@ -57,12 +57,10 @@ public: unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SrcSubIdx, unsigned &DstSubIdx) const; @@ -70,7 +68,8 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, diff --git a/lib/Target/PIC16/PIC16InstrInfo.td b/lib/Target/PIC16/PIC16InstrInfo.td index 24df251..86d36cb 100644 --- a/lib/Target/PIC16/PIC16InstrInfo.td +++ b/lib/Target/PIC16/PIC16InstrInfo.td @@ -134,7 +134,7 @@ include "PIC16InstrFormats.td" //===----------------------------------------------------------------------===// // W = W Op F : Load the value from F and do Op to W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class BinOpFW OpCode, string OpcStr, SDNode OpNode>: ByteFormat OpCode, string OpcStr, SDNode OpNode>: // F = F Op W : Load the value from F, do op with W and store in F. // This insn class is not marked as TwoAddress because the reg is // being used as a source operand only. (Remember a TwoAddress insn -// needs a copyRegToReg.) +// needs a copy.) let mayStore = 1 in class BinOpWF OpCode, string OpcStr, SDNode OpNode>: ByteFormat OpCode, string OpcStr, SDNode OpNode>: )]>; // W = W Op L : Do Op of L with W and place result in W. -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class BinOpWL opcode, string OpcStr, SDNode OpNode> : LiteralFormat; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def set_fsrhi: ByteFormat<0, (outs FSR16:$dst), (ins FSR16:$src, GPR:$val), @@ -234,8 +234,8 @@ def set_pclath: [(set PCLATHR:$dst , (MTPCLATH GPR:$val))]>; //---------------------------- -// copyRegToReg -// copyRegToReg insns. These are dummy. They should always be deleted +// copyPhysReg +// copyPhysReg insns. These are dummy. They should always be deleted // by the optimizer and never be present in the final generated code. // if they are, then we have to write correct macros for these insns. //---------------------------- @@ -362,7 +362,7 @@ def addwfc: BinOpWF<0, "addwfc", adde>; // With Carry. } // W -= [F] ; load from F and sub the value from W. -let isTwoAddress = 1, mayLoad = 1 in +let Constraints = "$src = $dst", mayLoad = 1 in class SUBFW OpCode, string OpcStr, SDNode OpNode>: ByteFormat; // sublw // W = C - W ; sub W from literal. (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBLW opcode, string OpcStr, SDNode OpNode> : LiteralFormat opcode, string OpcStr, SDNode OpNode> : [(set GPR:$dst, (OpNode (i8 imm:$literal), GPR:$src))]>; // subwl // W = W - C ; sub literal from W (Without borrow). -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in class SUBWL opcode, string OpcStr, SDNode OpNode> : LiteralFormatgetDebugLoc(); BuildMI(*MBB, I, dl, TII->get(PIC16::pagesel)).addExternalSymbol("$"); Changed = true; - PageChanged = 0; + PageChanged = 0; } } } diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp index c282521..27f1cf5 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp @@ -150,8 +150,8 @@ void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) { // For PIC16, automatic variables of a function are emitted as globals. -// Clone the auto variables of a function and put them in ValueMap, -// this ValueMap will be used while +// Clone the auto variables of a function and put them in VMap, +// this VMap will be used while // Cloning the code of function itself. // void PIC16Cloner::CloneAutos(Function *F) { @@ -160,11 +160,11 @@ void PIC16Cloner::CloneAutos(Function *F) { Module *M = F->getParent(); Module::GlobalListType &Globals = M->getGlobalList(); - // Clear the leftovers in ValueMap by any previous cloning. - ValueMap.clear(); + // Clear the leftovers in VMap by any previous cloning. + VMap.clear(); // Find the auto globls for this function and clone them, and put them - // in ValueMap. + // in VMap. std::string FnName = F->getName().str(); std::string VarName, ClonedVarName; for (Module::global_iterator I = M->global_begin(), E = M->global_end(); @@ -182,8 +182,8 @@ void PIC16Cloner::CloneAutos(Function *F) { // Add these new globals to module's globals list. Globals.push_back(ClonedGV); - // Update ValueMap. - ValueMap[GV] = ClonedGV; + // Update VMap. + VMap[GV] = ClonedGV; } } } @@ -236,10 +236,10 @@ void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) { } // Clone the given function and return it. -// Note: it uses the ValueMap member of the class, which is already populated +// Note: it uses the VMap member of the class, which is already populated // by cloneAutos by the time we reach here. -// FIXME: Should we just pass ValueMap's ref as a parameter here? rather -// than keeping the ValueMap as a member. +// FIXME: Should we just pass VMap's ref as a parameter here? rather +// than keeping the VMap as a member. Function * PIC16Cloner::cloneFunction(Function *OrgF) { Function *ClonedF; @@ -252,11 +252,11 @@ PIC16Cloner::cloneFunction(Function *OrgF) { } // Clone does not exist. - // First clone the autos, and populate ValueMap. + // First clone the autos, and populate VMap. CloneAutos(OrgF); // Now create the clone. - ClonedF = CloneFunction(OrgF, ValueMap); + ClonedF = CloneFunction(OrgF, VMap); // The new function should be for interrupt line. Therefore should have // the name suffixed with IL and section attribute marked with IL. diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h index 24c1152..e8b5aa4 100644 --- a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h +++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h @@ -15,7 +15,7 @@ #ifndef PIC16CLONER_H #define PIC16CLONER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" using namespace llvm; using std::vector; @@ -72,7 +72,7 @@ namespace llvm { // the corresponding cloned auto variable of the cloned function. // This value map is passed during the function cloning so that all the // uses of auto variables be updated properly. - DenseMap ValueMap; + ValueMap VMap; // Map of a already cloned functions. map ClonedFunctionMap; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index 30a1d4a..dff98d1 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -35,13 +35,6 @@ getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -// PIC16 Callee Saved Reg Classes -const TargetRegisterClass* const* -PIC16RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - BitVector PIC16RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); return Reserved; diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 6a9a038..5536a61 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -41,10 +41,6 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF = 0) const; - // PIC16 callee saved register classes - virtual const TargetRegisterClass* const * - getCalleeSavedRegClasses(const MachineFunction *MF) const; - virtual BitVector getReservedRegs(const MachineFunction &MF) const; virtual bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 66dfd4b..db11fde 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -78,7 +78,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, isLoad = TID.mayLoad(); isStore = TID.mayStore(); - unsigned TSFlags = TID.TSFlags; + uint64_t TSFlags = TID.TSFlags; isFirst = TSFlags & PPCII::PPC970_First; isSingle = TSFlags & PPCII::PPC970_Single; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 10b516a..d47d989 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1203,11 +1203,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = Op.getValueType(); GlobalAddressSDNode *GSDN = cast(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); - SDValue Zero = DAG.getConstant(0, PtrVT); // FIXME there isn't really any debug info here DebugLoc dl = GSDN->getDebugLoc(); + const GlobalValue *GV = GSDN->getGlobal(); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GSDN->getOffset()); + SDValue Zero = DAG.getConstant(0, PtrVT); const TargetMachine &TM = DAG.getTarget(); @@ -1631,7 +1631,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable, false); + isImmutable); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -1700,8 +1700,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( FuncInfo->setVarArgsStackOffset( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset(), - true, false)); + CCInfo.getNextStackOffset(), true)); FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); @@ -1911,7 +1910,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -1936,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0, @@ -2062,7 +2061,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), - isImmutable, false); + isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); @@ -2097,7 +2096,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth, true, false)); + Depth, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); // If this function is vararg, store any remaining integer argument regs @@ -2137,6 +2136,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, unsigned CC, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, unsigned &nAltivecParamsAtEnd) { // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is @@ -2153,9 +2153,9 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, // 16-byte aligned. nAltivecParamsAtEnd = 0; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; - EVT ArgVT = Arg.getValueType(); + EVT ArgVT = Outs[i].VT; // Varargs Altivec parameters are padded to a 16 byte boundary. if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { @@ -2314,8 +2314,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, - NewRetAddrLoc, - true, false); + NewRetAddrLoc, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, @@ -2328,7 +2327,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, - true, false); + true); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0, @@ -2346,7 +2345,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SmallVector& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false); + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; @@ -2472,7 +2471,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + Callee.getValueType()); else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) @@ -2705,6 +2705,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -2714,11 +2715,11 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } else { return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, - isTailCall, Outs, Ins, + isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -2728,6 +2729,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -2737,7 +2739,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) && "Unknown calling convention!"); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); unsigned PtrByteSize = 4; MachineFunction &MF = DAG.getMachineFunction(); @@ -2769,7 +2770,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); + EVT ArgVT = Outs[i].VT; ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; bool Result; @@ -2838,7 +2839,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { @@ -2934,6 +2935,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -2961,7 +2963,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // prereserved space for [SP][CR][LR][3 x unused]. unsigned NumBytes = CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, - Outs, + Outs, OutVals, nAltivecParamsAtEnd); // Calculate by how many bytes the stack has to be adjusted in case of tail @@ -3025,7 +3027,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SmallVector MemOpChains; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // PtrOff will be used to store the current argument to the stack if a @@ -3051,7 +3053,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Everything else is passed left-justified. EVT VT = (Size==1) ? MVT::i8 : MVT::i16; if (GPR_idx != NumGPRs) { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, dl, Chain, Arg, NULL, 0, VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); @@ -3228,8 +3230,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, ArgOffset = ((ArgOffset+15)/16)*16; ArgOffset += 12*16; for (unsigned i = 0; i != NumOps; ++i) { - SDValue Arg = Outs[i].Val; - EVT ArgType = Arg.getValueType(); + SDValue Arg = OutVals[i]; + EVT ArgType = Outs[i].VT; if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { if (++j > NumVRs) { @@ -3297,6 +3299,7 @@ SDValue PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { SmallVector RVLocs; @@ -3318,7 +3321,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); Flag = Chain.getValue(1); } @@ -3376,8 +3379,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // Find out what the fix offset of the frame pointer save area. int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, - true, false); + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); // Save the result. FI->setReturnAddrSaveIndex(RASI); } @@ -3403,8 +3405,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -4518,7 +4519,10 @@ PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); unsigned TmpReg = (!BinOpcode) ? incr : @@ -4583,7 +4587,10 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); F->insert(It, loopMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -4716,23 +4723,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); unsigned SelectPred = MI->getOperand(4).getImm(); DebugLoc dl = MI->getDebugLoc(); - BuildMI(BB, dl, TII->get(PPC::BCC)) - .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII->get(PPC::BCC)) + .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -4745,7 +4751,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII->get(PPC::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII->get(PPC::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); } @@ -4831,7 +4838,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); // thisMBB: // ... @@ -4899,7 +4909,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->insert(It, loop2MBB); F->insert(It, midMBB); F->insert(It, exitMBB); - exitMBB->transferSuccessors(BB); + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); MachineRegisterInfo &RegInfo = F->getRegInfo(); const TargetRegisterClass *RC = @@ -5025,7 +5038,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, llvm_unreachable("Unexpected instr type to insert"); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -5042,19 +5055,19 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, default: break; case PPCISD::SHL: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 << V -> 0. + if (C->isNullValue()) // 0 << V -> 0. return N->getOperand(0); } break; case PPCISD::SRL: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->getZExtValue() == 0) // 0 >>u V -> 0. + if (C->isNullValue()) // 0 >>u V -> 0. return N->getOperand(0); } break; case PPCISD::SRA: if (ConstantSDNode *C = dyn_cast(N->getOperand(0))) { - if (C->getZExtValue() == 0 || // 0 >>s V -> 0. + if (C->isNullValue() || // 0 >>s V -> 0. C->isAllOnesValue()) // -1 >>s V -> -1. return N->getOperand(0); } @@ -5380,11 +5393,8 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops -/// vector. If it is invalid, don't add anything to Ops. If hasMemory is true -/// it means one of the asm constraint of the inline asm instruction being -/// processed is 'm'. +/// vector. If it is invalid, don't add anything to Ops. void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, - bool hasMemory, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result(0,0); @@ -5443,7 +5453,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, } // Handle standard constraint letters. - TargetLowering::LowerAsmOperandForConstraint(Op, Letter, hasMemory, Ops, DAG); + TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); } // isLegalAddressingMode - Return true if the addressing mode represented diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 6dcaf1e..700816f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -318,12 +318,9 @@ namespace llvm { unsigned getByValTypeAlignment(const Type *Ty) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops - /// vector. If it is invalid, don't add anything to Ops. If hasMemory is - /// true it means one of the asm constraint of the inline asm instruction - /// being processed is 'm'. + /// vector. If it is invalid, don't add anything to Ops. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const; @@ -438,6 +435,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -446,6 +444,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue @@ -465,6 +464,7 @@ namespace llvm { LowerCall_Darwin(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -472,6 +472,7 @@ namespace llvm { LowerCall_SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 1b7a778..1574aa3 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -316,9 +316,8 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -327,50 +326,46 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, // One-way branch. if (FBB == 0) { if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, dl, get(PPC::B)).addMBB(TBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB); else // Conditional branch - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); return 1; } // Two-way Conditional Branch. - BuildMI(&MBB, dl, get(PPC::BCC)) + BuildMI(&MBB, DL, get(PPC::BCC)) .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, dl, get(PPC::B)).addMBB(FBB); + BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB); return 2; } -bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == PPC::GPRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::G8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::F4RCRegisterClass || - DestRC == PPC::F8RCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::CRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg); - } else if (DestRC == PPC::VRRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::VOR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else if (DestRC == PPC::CRBITRCRegisterClass) { - BuildMI(MBB, MI, DL, get(PPC::CROR), DestReg).addReg(SrcReg).addReg(SrcReg); - } else { - // Attempt to copy register that is not GPR or FPR - return false; - } - - return true; +void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (PPC::GPRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR; + else if (PPC::G8RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::OR8; + else if (PPC::F4RCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::FMR; + else if (PPC::CRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::MCRF; + else if (PPC::VRRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::VOR; + else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg)) + Opc = PPC::CROR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + const TargetInstrDesc &TID = get(Opc); + if (TID.getNumOperands() == 3) + BuildMI(MBB, I, DL, TID, DestReg) + .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc)); + else + BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -654,121 +649,6 @@ PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, return &*MIB; } -/// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into -/// copy instructions, turning them into load/store instructions. -MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr *MI, - const SmallVectorImpl &Ops, - int FrameIndex) const { - if (Ops.size() != 1) return NULL; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - unsigned OpNum = Ops[0]; - - MachineInstr *NewMI = NULL; - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STW)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LWZ)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) { - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STD)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LD)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) { - // The register may be F4RC or F8RC, and that determines the memory op. - unsigned OrigReg = MI->getOperand(OpNum).getReg(); - // We cannot tell the register class from a physreg alone. - if (TargetRegisterInfo::isPhysicalRegister(OrigReg)) - return NULL; - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg); - const bool is64 = RC == PPC::F8RCRegisterClass; - - if (OpNum == 0) { // move -> store - unsigned InReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::STFD : PPC::STFS)) - .addReg(InReg, - getKillRegState(isKill) | - getUndefRegState(isUndef)), - FrameIndex); - } else { // move -> load - unsigned OutReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), - get(is64 ? PPC::LFD : PPC::LFS)) - .addReg(OutReg, - RegState::Define | - getDeadRegState(isDead) | - getUndefRegState(isUndef)), - FrameIndex); - } - } - - return NewMI; -} - -bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - if (Ops.size() != 1) return false; - - // Make sure this is a reg-reg copy. Note that we can't handle MCRF, because - // it takes more than one instruction to store it. - unsigned Opc = MI->getOpcode(); - - if ((Opc == PPC::OR && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if ((Opc == PPC::OR8 && - MI->getOperand(1).getReg() == MI->getOperand(2).getReg())) - return true; - else if (Opc == PPC::FMR || Opc == PPC::FMRSD) - return true; - - return false; -} - - bool PPCInstrInfo:: ReverseBranchCondition(SmallVectorImpl &Cond) const { assert(Cond.size() == 2 && "Invalid PPC branch opcode!"); diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index 7a9e11b..eadb21e 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -109,13 +109,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -135,23 +134,6 @@ public: const MDNode *MDPtr, DebugLoc DL) const; - /// foldMemoryOperand - PowerPC (like most RISC's) can only fold spills into - /// copy instructions, turning them into load/store instructions. - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - - virtual bool canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const; - virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 0ff852c..4d6132a9 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -269,140 +269,6 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } -const TargetRegisterClass* const* -PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - // 32-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin32_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::GPRCRegClass, 0 - }; - - // 32-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { - &PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - // 64-bit Darwin calling convention. - static const TargetRegisterClass * const Darwin64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - &PPC::G8RCRegClass, 0 - }; - - // 64-bit SVR4 calling convention. - static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = { - &PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, - - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, - &PPC::F8RCRegClass,&PPC::F8RCRegClass, - - &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, - - &PPC::VRSAVERCRegClass, - - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, - - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, - &PPC::CRBITRCRegClass, - - 0 - }; - - if (Subtarget.isDarwinABI()) - return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : - Darwin32_CalleeSavedRegClasses; - - return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses - : SVR4_CalleeSavedRegClasses; -} - // needsFP - Return true if the specified function should have a dedicated frame // pointer register. This is true if the function has variable sized allocas or // if frame pointer elimination is disabled. @@ -1060,8 +926,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, - true, false); + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -1069,8 +934,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (GuaranteedTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, - true, false); + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or @@ -1127,9 +991,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = CSI[i].getRegClass(); - - if (RC == PPC::GPRCRegisterClass) { + if (PPC::GPRCRegisterClass->contains(Reg)) { HasGPSaveArea = true; GPRegs.push_back(CSI[i]); @@ -1137,7 +999,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } - } else if (RC == PPC::G8RCRegisterClass) { + } else if (PPC::G8RCRegisterClass->contains(Reg)) { HasG8SaveArea = true; G8Regs.push_back(CSI[i]); @@ -1145,7 +1007,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinG8R) { MinG8R = Reg; } - } else if (RC == PPC::F8RCRegisterClass) { + } else if (PPC::F8RCRegisterClass->contains(Reg)) { HasFPSaveArea = true; FPRegs.push_back(CSI[i]); @@ -1154,12 +1016,12 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) MinFPR = Reg; } // FIXME SVR4: Disable CR save area for now. - } else if ( RC == PPC::CRBITRCRegisterClass - || RC == PPC::CRRCRegisterClass) { + } else if (PPC::CRBITRCRegisterClass->contains(Reg) + || PPC::CRRCRegisterClass->contains(Reg)) { // HasCRSaveArea = true; - } else if (RC == PPC::VRSAVERCRegisterClass) { + } else if (PPC::VRSAVERCRegisterClass->contains(Reg)) { HasVRSAVESaveArea = true; - } else if (RC == PPC::VRRCRegisterClass) { + } else if (PPC::VRRCRegisterClass->contains(Reg)) { HasVRSaveArea = true; VRegs.push_back(CSI[i]); @@ -1240,9 +1102,10 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the CR/CRBIT register class? // Adjust the frame index of the CR spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::CRBITRCRegisterClass || RC == PPC::CRRCRegisterClass) { + if (PPC::CRBITRCRegisterClass->contains(Reg) || + PPC::CRRCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1257,9 +1120,9 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // which have the VRSAVE register class? // Adjust the frame index of the VRSAVE spill slot. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - const TargetRegisterClass *RC = CSI[i].getRegClass(); + unsigned Reg = CSI[i].getReg(); - if (RC == PPC::VRSAVERCRegisterClass) { + if (PPC::VRSAVERCRegisterClass->contains(Reg)) { int FI = CSI[i].getFrameIdx(); FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); @@ -1762,4 +1625,3 @@ int PPCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { } #include "PPCGenRegisterInfo.inc" - diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 43cf535..f026847 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -42,9 +42,6 @@ public: /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; /// targetHandlesStackFrameRounding - Returns true if the target is diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 7fa73ed..4d7ee08 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -300,6 +300,14 @@ unsigned long reverse(unsigned v) { return v ^ (t >> 8); } +Neither is this (very standard idiom): + +int f(int n) +{ + return (((n) << 24) | (((n) & 0xff00) << 8) + | (((n) >> 8) & 0xff00) | ((n) >> 24)); +} + //===---------------------------------------------------------------------===// [LOOP RECOGNITION] @@ -898,17 +906,6 @@ The expression should optimize to something like //===---------------------------------------------------------------------===// -From GCC Bug 3756: -int -pn (int n) -{ - return (n >= 0 ? 1 : -1); -} -Should combine to (n >> 31) | 1. Currently not optimized with "clang --emit-llvm-bc | opt -std-compile-opts | llc". - -//===---------------------------------------------------------------------===// - void a(int variable) { if (variable == 4 || variable == 6) @@ -1439,33 +1436,6 @@ This pattern repeats several times, basically doing: //===---------------------------------------------------------------------===// -186.crafty contains this interesting pattern: - -%77 = call i8* @strstr(i8* getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0), - i8* %30) -%phitmp648 = icmp eq i8* %77, getelementptr ([6 x i8]* @"\01LC5", i32 0, i32 0) -br i1 %phitmp648, label %bb70, label %bb76 - -bb70: ; preds = %OptionMatch.exit91, %bb69 - %78 = call i32 @strlen(i8* %30) nounwind readonly align 1 ; [#uses=1] - -This is basically: - cststr = "abcdef"; - if (strstr(cststr, P) == cststr) { - x = strlen(P); - ... - -The strstr call would be significantly cheaper written as: - -cststr = "abcdef"; -if (memcmp(P, str, strlen(P))) - x = strlen(P); - -This is memcmp+strlen instead of strstr. This also makes the strlen fully -redundant. - -//===---------------------------------------------------------------------===// - 186.crafty also contains this code: %1906 = call i32 @strlen(i8* getelementptr ([32 x i8]* @pgn_event, i32 0,i32 0)) @@ -1863,3 +1833,91 @@ LLVM prefers comparisons with zero over non-zero in general, but in this case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// + +Take the following testcase on x86-64 (similar testcases exist for all targets +with addc/adde): + +define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, +i64 %c) nounwind { +entry: + %0 = zext i64 %a to i128 ; [#uses=1] + %1 = zext i64 %b to i128 ; [#uses=1] + %2 = add i128 %1, %0 ; [#uses=2] + %3 = zext i64 %c to i128 ; [#uses=1] + %4 = shl i128 %3, 64 ; [#uses=1] + %5 = add i128 %4, %2 ; [#uses=1] + %6 = lshr i128 %5, 64 ; [#uses=1] + %7 = trunc i128 %6 to i64 ; [#uses=1] + store i64 %7, i64* %s, align 8 + %8 = trunc i128 %2 to i64 ; [#uses=1] + store i64 %8, i64* %t, align 8 + ret void +} + +Generated code: + addq %rcx, %rdx + movl $0, %eax + adcq $0, %rax + addq %r8, %rax + movq %rax, (%rdi) + movq %rdx, (%rsi) + ret + +Expected code: + addq %rcx, %rdx + adcq $0, %r8 + movq %r8, (%rdi) + movq %rdx, (%rsi) + ret + +The generated SelectionDAG has an ADD of an ADDE, where both operands of the +ADDE are zero. Replacing one of the operands of the ADDE with the other operand +of the ADD, and replacing the ADD with the ADDE, should give the desired result. + +(That said, we are doing a lot better than gcc on this testcase. :) ) + +//===---------------------------------------------------------------------===// + +Switch lowering generates less than ideal code for the following switch: +define void @a(i32 %x) nounwind { +entry: + switch i32 %x, label %if.end [ + i32 0, label %if.then + i32 1, label %if.then + i32 2, label %if.then + i32 3, label %if.then + i32 5, label %if.then + ] +if.then: + tail call void @foo() nounwind + ret void +if.end: + ret void +} +declare void @foo() + +Generated code on x86-64 (other platforms give similar results): +a: + cmpl $5, %edi + ja .LBB0_2 + movl %edi, %eax + movl $47, %ecx + btq %rax, %rcx + jb .LBB0_3 +.LBB0_2: + ret +.LBB0_3: + jmp foo # TAILCALL + +The movl+movl+btq+jb could be simplified to a cmpl+jne. + +Or, if we wanted to be really clever, we could simplify the whole thing to +something like the following, which eliminates a branch: + xorl $1, %edi + cmpl $4, %edi + ja .LBB0_2 + ret +.LBB0_2: + jmp foo # TAILCALL + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index f47e53a..4099a62 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -38,6 +38,7 @@ SDValue SparcTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to locations. @@ -66,7 +67,7 @@ SparcTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); @@ -133,7 +134,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { @@ -146,7 +147,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Offset = 4-std::max(1U, ObjectVT.getSizeInBits()/8); FIPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, FIPtr, DAG.getConstant(Offset, MVT::i32)); - Load = DAG.getExtLoad(LoadOp, dl, MVT::i32, Chain, FIPtr, + Load = DAG.getExtLoad(LoadOp, MVT::i32, dl, Chain, FIPtr, NULL, 0, ObjectVT, false, false, 0); Load = DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, Load); } @@ -169,7 +170,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, InVals.push_back(Arg); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -192,7 +193,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -205,7 +206,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0, false, false, 0); @@ -239,7 +240,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, - true, false); + true); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0, @@ -262,6 +263,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -283,7 +285,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Count the size of the outgoing arguments. unsigned ArgsSize = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - switch (Outs[i].Val.getValueType().getSimpleVT().SimpleTy) { + switch (Outs[i].VT.getSimpleVT().SimpleTy) { default: llvm_unreachable("Unknown value type!"); case MVT::i1: case MVT::i8: @@ -316,7 +318,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -358,8 +360,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, unsigned ArgOffset = 68; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - SDValue Val = Outs[i].Val; - EVT ObjectVT = Val.getValueType(); + SDValue Val = OutVals[i]; + EVT ObjectVT = Outs[i].VT; SDValue ValToStore(0, 0); unsigned ObjSize; switch (ObjectVT.getSimpleVT().SimpleTy) { @@ -478,7 +480,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -737,7 +739,7 @@ void SparcTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, unsigned &SPCC) { if (isa(RHS) && - cast(RHS)->getZExtValue() == 0 && + cast(RHS)->isNullValue() && CC == ISD::SETNE && ((LHS.getOpcode() == SPISD::SELECT_ICC && LHS.getOperand(3).getOpcode() == SPISD::CMPICC) || @@ -745,8 +747,8 @@ static void LookThroughSetCC(SDValue &LHS, SDValue &RHS, LHS.getOperand(3).getOpcode() == SPISD::CMPFCC)) && isa(LHS.getOperand(0)) && isa(LHS.getOperand(1)) && - cast(LHS.getOperand(0))->getZExtValue() == 1 && - cast(LHS.getOperand(1))->getZExtValue() == 0) { + cast(LHS.getOperand(0))->isOne() && + cast(LHS.getOperand(1))->isNullValue()) { SDValue CMPCC = LHS.getOperand(3); SPCC = cast(LHS.getOperand(2))->getZExtValue(); LHS = CMPCC.getOperand(0); @@ -759,7 +761,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, const GlobalValue *GV = cast(Op)->getGlobal(); // FIXME there isn't really any debug info here DebugLoc dl = Op.getDebugLoc(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); SDValue Hi = DAG.getNode(SPISD::Hi, dl, MVT::i32, GA); SDValue Lo = DAG.getNode(SPISD::Lo, dl, MVT::i32, GA); @@ -1007,21 +1009,20 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + + // Add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(BROpcode)).addMBB(sinkMBB).addImm(CC); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); // copy0MBB: // %FalseValue = ... @@ -1035,11 +1036,11 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, TII.get(SP::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 5ebdcac..db39e08 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -86,6 +86,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -94,6 +95,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp index 8e49eca..3a4c80a 100644 --- a/lib/Target/Sparc/SparcInstrInfo.cpp +++ b/lib/Target/Sparc/SparcInstrInfo.cpp @@ -109,38 +109,29 @@ unsigned SparcInstrInfo::isStoreToStackSlot(const MachineInstr *MI, unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond)const{ - // FIXME this should probably take a DebugLoc argument - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL)const{ // Can only insert uncond branches so far. assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!"); - BuildMI(&MBB, dl, get(SP::BA)).addMBB(TBB); + BuildMI(&MBB, DL, get(SP::BA)).addMBB(TBB); return 1; } -bool SparcInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - if (DestRC != SrcRC) { - // Not yet supported! - return false; - } - - if (DestRC == SP::IntRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0).addReg(SrcReg); - else if (DestRC == SP::FPRegsRegisterClass) - BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg).addReg(SrcReg); - else if (DestRC == SP::DFPRegsRegisterClass) - BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD),DestReg) - .addReg(SrcReg); +void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + if (SP::IntRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::ORrr), DestReg).addReg(SP::G0) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::FPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(SP::FMOVS), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + else if (SP::DFPRegsRegClass.contains(DestReg, SrcReg)) + BuildMI(MBB, I, DL, get(Subtarget.isV9() ? SP::FMOVD : SP::FpMOVD), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); else - // Can't copy this register - return false; - - return true; + llvm_unreachable("Impossible reg-to-reg copy"); } void SparcInstrInfo:: @@ -183,61 +174,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, llvm_unreachable("Can't load this register from stack slot"); } -MachineInstr *SparcInstrInfo::foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FI) const { - if (Ops.size() != 1) return NULL; - - unsigned OpNum = Ops[0]; - bool isFloat = false; - MachineInstr *NewMI = NULL; - switch (MI->getOpcode()) { - case SP::ORrr: - if (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == SP::G0&& - MI->getOperand(0).isReg() && MI->getOperand(2).isReg()) { - if (OpNum == 0) // COPY -> STORE - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::STri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(MI->getOperand(2).getReg()); - else // COPY -> LOAD - NewMI = BuildMI(MF, MI->getDebugLoc(), get(SP::LDri), - MI->getOperand(0).getReg()) - .addFrameIndex(FI) - .addImm(0); - } - break; - case SP::FMOVS: - isFloat = true; - // FALLTHROUGH - case SP::FMOVD: - if (OpNum == 0) { // COPY -> STORE - unsigned SrcReg = MI->getOperand(1).getReg(); - bool isKill = MI->getOperand(1).isKill(); - bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::STFri : SP::STDFri)) - .addFrameIndex(FI) - .addImm(0) - .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef)); - } else { // COPY -> LOAD - unsigned DstReg = MI->getOperand(0).getReg(); - bool isDead = MI->getOperand(0).isDead(); - bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), - get(isFloat ? SP::LDFri : SP::LDDFri)) - .addReg(DstReg, RegState::Define | - getDeadRegState(isDead) | getUndefRegState(isUndef)) - .addFrameIndex(FI) - .addImm(0); - } - break; - } - - return NewMI; -} - unsigned SparcInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { SparcMachineFunctionInfo *SparcFI = MF->getInfo(); diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h index a00ba39..1334718 100644 --- a/lib/Target/Sparc/SparcInstrInfo.h +++ b/lib/Target/Sparc/SparcInstrInfo.h @@ -68,14 +68,13 @@ public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, @@ -89,18 +88,6 @@ public: const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const; - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - int FrameIndex) const; - - virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, - MachineInstr* MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - return 0; - } - unsigned getGlobalBaseReg(MachineFunction *MF) const; }; diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 9489580..ddadd51 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -665,7 +665,7 @@ let Defs = [FCC] in { //===----------------------------------------------------------------------===// // V9 Conditional Moves. -let Predicates = [HasV9], isTwoAddress = 1 in { +let Predicates = [HasV9], Constraints = "$T = $dst" in { // Move Integer Register on Condition (MOVcc) p. 194 of the V9 manual. // FIXME: Add instruction encodings for the JIT some day. def MOVICCrr diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 08373bb8..427cc7f 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -52,13 +52,6 @@ BitVector SparcRegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } - -const TargetRegisterClass* const* -SparcRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 }; - return CalleeSavedRegClasses; -} - bool SparcRegisterInfo::hasFP(const MachineFunction &MF) const { return false; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 24d43e3..9f0cda7 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -32,9 +32,6 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index 90be222..d7ac8f5 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -124,7 +124,7 @@ void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, unsigned Reg = MO.getReg(); if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_even32); + Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); else if (strncmp(Modifier + 7, "odd", 3) == 0) Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); else diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index bb2952a..ed290ca 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -670,7 +670,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), @@ -754,7 +754,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { // Copy the remainder (even subreg) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_even32 : SystemZ::subreg_even); + SystemZ::subreg_32bit : SystemZ::subreg_even); SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, NVT, SDValue(Result, 0), diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 76f2901..67f739f 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -254,6 +254,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -266,7 +267,7 @@ SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -334,7 +335,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset(), true, false); + VA.getLocMemOffset(), true); // Create the SelectionDAG nodes corresponding to a load // from this parameter @@ -372,6 +373,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -402,7 +404,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -464,7 +466,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy()); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); @@ -550,6 +552,7 @@ SDValue SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location @@ -575,7 +578,7 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Copy the result values into the output registers. for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; - SDValue ResValue = Outs[i].Val; + SDValue ResValue = OutVals[i]; assert(VA.isRegLoc() && "Can only return in registers!"); // If this is an 8/16/32-bit value, it is really should be passed promoted @@ -729,14 +732,14 @@ SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, SDValue Result; if (!IsPic && !ExtraLoadRequired) { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { unsigned char OpFlags = 0; if (ExtraLoadRequired) OpFlags = SystemZII::MO_GOTENT; - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, @@ -827,16 +830,20 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm(); - BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); F->insert(I, copy0MBB); F->insert(I, copy1MBB); // Update machine-CFG edges by transferring all successors of the current // block to the new block which will contain the Phi node for the select. - copy1MBB->transferSuccessors(BB); + copy1MBB->splice(copy1MBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + copy1MBB->transferSuccessorsAndUpdatePHIs(BB); // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(copy1MBB); + BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to copy1MBB @@ -849,11 +856,11 @@ SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = copy1MBB; - BuildMI(BB, dl, TII.get(SystemZ::PHI), + BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 94bd906..51d2df3 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -98,6 +98,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -126,6 +127,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -134,6 +136,7 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; const SystemZSubtarget &Subtarget; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index 8c5e905..a658280 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -126,7 +126,7 @@ def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), @@ -237,7 +237,7 @@ def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), "ddb\t{$dst, $src2}", [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>; -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), "sqebr\t{$dst, $src}", diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 043686c..c03864f 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -117,59 +117,28 @@ void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } -bool SystemZInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Same regclass for source and dest */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!CommonRC->hasSubClass(SrcRC)) - CommonRC = 0; - - if (CommonRC) { - if (CommonRC == &SystemZ::GR64RegClass || - CommonRC == &SystemZ::ADDR64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR32RegClass || - CommonRC == &SystemZ::ADDR32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR64PRegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rrP), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::GR128RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::MOV128rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP32RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV32rr), DestReg).addReg(SrcReg); - } else if (CommonRC == &SystemZ::FP64RegClass) { - BuildMI(MBB, I, DL, get(SystemZ::FMOV64rr), DestReg).addReg(SrcReg); - } else { - return false; - } - - return true; - } - - if ((SrcRC == &SystemZ::GR64RegClass && - DestRC == &SystemZ::ADDR64RegClass) || - (DestRC == &SystemZ::GR64RegClass && - SrcRC == &SystemZ::ADDR64RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV64rr), DestReg).addReg(SrcReg); - return true; - } else if ((SrcRC == &SystemZ::GR32RegClass && - DestRC == &SystemZ::ADDR32RegClass) || - (DestRC == &SystemZ::GR32RegClass && - SrcRC == &SystemZ::ADDR32RegClass)) { - BuildMI(MBB, I, DL, get(SystemZ::MOV32rr), DestReg).addReg(SrcReg); - return true; - } - - return false; +void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + unsigned Opc; + if (SystemZ::GR64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rr; + else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV32rr; + else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV64rrP; + else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::MOV128rr; + else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV32rr; + else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg)) + Opc = SystemZ::FMOV64rr; + else + llvm_unreachable("Impossible reg-to-reg copy"); + + BuildMI(MBB, I, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); } bool @@ -286,8 +255,7 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &SystemZ::FP64RegClass) { + if (!SystemZ::FP64RegClass.contains(Reg)) { unsigned Offset = RegSpillOffsets[Reg]; CalleeFrameSize += 8; if (StartOffset > Offset) { @@ -332,11 +300,10 @@ SystemZInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Save FPRs for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) { + if (SystemZ::FP64RegClass.contains(Reg)) { MBB.addLiveIn(Reg); - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } } @@ -361,9 +328,9 @@ SystemZInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, // Restore FP registers for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass == &SystemZ::FP64RegClass) - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + if (SystemZ::FP64RegClass.contains(Reg)) + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &SystemZ::FP64RegClass, &RI); } // Restore GP registers @@ -523,9 +490,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME: this should probably have a DebugLoc operand - DebugLoc DL; + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index a753f14..0559619 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -60,11 +60,10 @@ public: /// virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; } - bool copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; bool isMoveInstr(const MachineInstr& MI, unsigned &SrcReg, unsigned &DstReg, @@ -102,7 +101,8 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index 22bde4e..8df07c0 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -478,7 +478,8 @@ def MOV64rmm : RSYI<0x04EB, "lmg\t{$from, $to, $dst}", []>; -let isReMaterializable = 1, isAsCheapAsAMove = 1, isTwoAddress = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1, + Constraints = "$src = $dst" in { def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), "lhi\t${dst:subreg_even}, 0", []>; @@ -537,7 +538,7 @@ def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src), (implicit PSW)]>; } -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Defs = [PSW] in { @@ -924,12 +925,12 @@ def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2 "dlg\t{$dst, $src2}", []>; } // mayLoad -} // isTwoAddress = 1 +} // Constraints = "$src1 = $dst" //===----------------------------------------------------------------------===// // Shifts -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRL32rri : RSI<0x88, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "srl\t{$src, $amt}", @@ -939,7 +940,7 @@ def SRL64rri : RSYI<0xEB0C, "srlg\t{$dst, $src, $amt}", [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SHL32rri : RSI<0x89, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sll\t{$src, $amt}", @@ -950,7 +951,7 @@ def SHL64rri : RSYI<0xEB0D, [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>; let Defs = [PSW] in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def SRA32rri : RSI<0x8A, (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), "sra\t{$src, $amt}", @@ -1129,13 +1130,13 @@ def : Pat<(mulhs GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR32:$src1, GR32:$src2), (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), GR32:$src1, subreg_odd32), GR32:$src2), - subreg_even32)>; + subreg_32bit)>; def : Pat<(mulhu GR64:$src1, GR64:$src2), (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), GR64:$src1, subreg_odd), diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 638fd17..ae96b0b 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -47,22 +47,6 @@ SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CalleeSavedRegs; } -const TargetRegisterClass* const* -SystemZRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::GR64RegClass, &SystemZ::GR64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, - &SystemZ::FP64RegClass, &SystemZ::FP64RegClass, 0 - }; - return CalleeSavedRegClasses; -} - BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); if (hasFP(MF)) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index 42aa5dd..670025f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -32,9 +32,6 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { /// Code Generation virtual methods... const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool hasReservedCallFrame(MachineFunction &MF) const { return true; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index b561744..33be8dd 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -55,7 +55,6 @@ class FPRL num, string n, list subregs> let Namespace = "SystemZ" in { def subreg_32bit : SubRegIndex; -def subreg_even32 : SubRegIndex; def subreg_odd32 : SubRegIndex; def subreg_even : SubRegIndex; def subreg_odd : SubRegIndex; @@ -99,7 +98,7 @@ def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; } // Register pairs -let SubRegIndices = [subreg_even32, subreg_odd32] in { +let SubRegIndices = [subreg_32bit, subreg_odd32] in { def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>, DwarfRegNum<[0]>; def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>, DwarfRegNum<[2]>; def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>, DwarfRegNum<[4]>; @@ -111,8 +110,7 @@ def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>, DwarfRegNum<[14]>; } let SubRegIndices = [subreg_even, subreg_odd], - CompositeIndices = [(subreg_even32 subreg_even, subreg_32bit), - (subreg_odd32 subreg_odd, subreg_32bit)] in { + CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in { def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>, DwarfRegNum<[0]>; def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>, DwarfRegNum<[2]>; def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>, DwarfRegNum<[4]>; @@ -355,7 +353,7 @@ def ADDR64 : RegisterClass<"SystemZ", [i64], 64, def GR64P : RegisterClass<"SystemZ", [v2i32], 64, [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32)]; + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; iterator allocation_order_end(const MachineFunction &MF) const; @@ -391,7 +389,7 @@ def GR64P : RegisterClass<"SystemZ", [v2i32], 64, def GR128 : RegisterClass<"SystemZ", [v2i64], 128, [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]> { - let SubRegClasses = [(GR32 subreg_even32, subreg_odd32), + let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32), (GR64 subreg_even, subreg_odd)]; let MethodProtos = [{ iterator allocation_order_begin(const MachineFunction &MF) const; diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 094a57e..c099a7e 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -28,6 +28,10 @@ const TargetRegisterClass * TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const { if (isLookupPtrRegClass()) return TRI->getPointerRegClass(RegClass); + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return 0; + // Otherwise just look it up normally. return TRI->getRegClass(RegClass); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index b9372d0..dd7b532 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -101,7 +101,7 @@ static bool IsNullTerminatedString(const Constant *C) { ConstantInt *Null = dyn_cast(CVA->getOperand(ATy->getNumElements()-1)); - if (Null == 0 || Null->getZExtValue() != 0) + if (Null == 0 || !Null->isZero()) return false; // Not null terminated. // Verify that the null doesn't occur anywhere else in the string. diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp index dcc5f61..49bfad5 100644 --- a/lib/Target/TargetRegisterInfo.cpp +++ b/lib/Target/TargetRegisterInfo.cpp @@ -39,20 +39,20 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR, TargetRegisterInfo::~TargetRegisterInfo() {} -/// getPhysicalRegisterRegClass - Returns the Register Class of a physical -/// register of the given type. If type is EVT::Other, then just return any -/// register class the register belongs to. +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. const TargetRegisterClass * -TargetRegisterInfo::getPhysicalRegisterRegClass(unsigned reg, EVT VT) const { +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { assert(isPhysicalRegister(reg) && "reg must be a physical register"); - // Pick the most super register class of the right type that contains + // Pick the most sub register class of the right type that contains // this physreg. const TargetRegisterClass* BestRC = 0; for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ const TargetRegisterClass* RC = *I; if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && - (!BestRC || BestRC->hasSuperClass(RC))) + (!BestRC || BestRC->hasSubClass(RC))) BestRC = RC; } diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index a58f58e..26797ab 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -33,13 +33,11 @@ class X86AsmLexer : public TargetAsmLexer { } const AsmToken &lexDefinite() { - if(tentativeIsValid) { + if (tentativeIsValid) { tentativeIsValid = false; return tentativeToken; } - else { - return getLexer()->Lex(); - } + return getLexer()->Lex(); } AsmToken LexTokenATT(); @@ -72,38 +70,65 @@ public: static unsigned MatchRegisterName(StringRef Name); AsmToken X86AsmLexer::LexTokenATT() { - const AsmToken lexedToken = lexDefinite(); + AsmToken lexedToken = lexDefinite(); switch (lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Percent: - { + return lexedToken; + + case AsmToken::Percent: { const AsmToken &nextToken = lexTentative(); - if (nextToken.getKind() == AsmToken::Identifier) { - unsigned regID = MatchRegisterName(nextToken.getString()); + if (nextToken.getKind() != AsmToken::Identifier) + return lexedToken; + - if (regID) { - lexDefinite(); + if (unsigned regID = MatchRegisterName(nextToken.getString())) { + lexDefinite(); + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. + StringRef regStr(lexedToken.getString().data(), + lexedToken.getString().size() + + nextToken.getString().size()); + + return AsmToken(AsmToken::Register, regStr, + static_cast(regID)); + } + + // Match register name failed. If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (nextToken.getString().size() == 3 && + nextToken.getString().startswith("db")) { + int RegNo = -1; + switch (nextToken.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != -1) { + lexDefinite(); + + // FIXME: This is completely wrong when there is a space or other + // punctuation between the % and the register name. StringRef regStr(lexedToken.getString().data(), lexedToken.getString().size() + nextToken.getString().size()); - - return AsmToken(AsmToken::Register, - regStr, - static_cast(regID)); - } - else { - return AsmToken(lexedToken); + return AsmToken(AsmToken::Register, regStr, + static_cast(RegNo)); } } - else { - return AsmToken(lexedToken); - } + + + return lexedToken; } } } @@ -113,26 +138,22 @@ AsmToken X86AsmLexer::LexTokenIntel() { switch(lexedToken.getKind()) { default: - return AsmToken(lexedToken); + return lexedToken; case AsmToken::Error: SetError(Lexer->getErrLoc(), Lexer->getErr()); - return AsmToken(lexedToken); - case AsmToken::Identifier: - { + return lexedToken; + case AsmToken::Identifier: { std::string upperCase = lexedToken.getString().str(); std::string lowerCase = LowercaseString(upperCase); StringRef lowerRef(lowerCase); unsigned regID = MatchRegisterName(lowerRef); - if (regID) { + if (regID) return AsmToken(AsmToken::Register, lexedToken.getString(), static_cast(regID)); - } - else { - return AsmToken(lexedToken); - } + return lexedToken; } } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 40a6a7b..a856e9c 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -412,6 +412,28 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, return false; } + // If this is "db[0-7]", match it as an alias + // for dr[0-7]. + if (RegNo == 0 && Tok.getString().size() == 3 && + Tok.getString().startswith("db")) { + switch (Tok.getString()[2]) { + case '0': RegNo = X86::DR0; break; + case '1': RegNo = X86::DR1; break; + case '2': RegNo = X86::DR2; break; + case '3': RegNo = X86::DR3; break; + case '4': RegNo = X86::DR4; break; + case '5': RegNo = X86::DR5; break; + case '6': RegNo = X86::DR6; break; + case '7': RegNo = X86::DR7; break; + } + + if (RegNo != 0) { + EndLoc = Tok.getLoc(); + Parser.Lex(); // Eat it. + return false; + } + } + if (RegNo == 0) return Error(Tok.getLoc(), "invalid register name"); @@ -597,6 +619,16 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, return Error(NameLoc, "pushfq cannot be encoded in 32-bit mode"); } + // The "Jump if rCX Zero" form jcxz is not allowed in 64-bit mode and + // the form jrcxz is not allowed in 32-bit mode. + if (Is64Bit) { + if (Name == "jcxz") + return Error(NameLoc, "jcxz cannot be encoded in 64-bit mode"); + } else { + if (Name == "jrcxz") + return Error(NameLoc, "jrcxz cannot be encoded in 32-bit mode"); + } + // FIXME: Hack to recognize "sal..." and "rep..." for now. We need a way to // represent alternative syntaxes in the .td file, without requiring // instruction duplication. @@ -617,6 +649,23 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("setnz", "setne") .Case("jz", "je") .Case("jnz", "jne") + .Case("jc", "jb") + // FIXME: in 32-bit mode jcxz requires an AdSize prefix. In 64-bit mode + // jecxz requires an AdSize prefix but jecxz does not have a prefix in + // 32-bit mode. + .Case("jecxz", "jcxz") + .Case("jrcxz", "jcxz") + .Case("jna", "jbe") + .Case("jnae", "jb") + .Case("jnb", "jae") + .Case("jnbe", "ja") + .Case("jnc", "jae") + .Case("jng", "jle") + .Case("jnge", "jl") + .Case("jnl", "jge") + .Case("jnle", "jg") + .Case("jpe", "jp") + .Case("jpo", "jnp") .Case("cmovcl", "cmovbl") .Case("cmovcl", "cmovbl") .Case("cmovnal", "cmovbel") @@ -631,36 +680,64 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, .Case("cmovnlel", "cmovgl") .Case("cmovnzl", "cmovnel") .Case("cmovzl", "cmovel") + .Case("fwait", "wait") + .Case("movzx", "movzb") .Default(Name); // FIXME: Hack to recognize cmp{ss,sd,ps,pd}. const MCExpr *ExtraImmOp = 0; - if (PatchedName.startswith("cmp") && + if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) && (PatchedName.endswith("ss") || PatchedName.endswith("sd") || PatchedName.endswith("ps") || PatchedName.endswith("pd"))) { + bool IsVCMP = PatchedName.startswith("vcmp"); + unsigned SSECCIdx = IsVCMP ? 4 : 3; unsigned SSEComparisonCode = StringSwitch( - PatchedName.slice(3, PatchedName.size() - 2)) - .Case("eq", 0) - .Case("lt", 1) - .Case("le", 2) - .Case("unord", 3) - .Case("neq", 4) - .Case("nlt", 5) - .Case("nle", 6) - .Case("ord", 7) + PatchedName.slice(SSECCIdx, PatchedName.size() - 2)) + .Case("eq", 0) + .Case("lt", 1) + .Case("le", 2) + .Case("unord", 3) + .Case("neq", 4) + .Case("nlt", 5) + .Case("nle", 6) + .Case("ord", 7) + .Case("eq_uq", 8) + .Case("nge", 9) + .Case("ngt", 0x0A) + .Case("false", 0x0B) + .Case("neq_oq", 0x0C) + .Case("ge", 0x0D) + .Case("gt", 0x0E) + .Case("true", 0x0F) + .Case("eq_os", 0x10) + .Case("lt_oq", 0x11) + .Case("le_oq", 0x12) + .Case("unord_s", 0x13) + .Case("neq_us", 0x14) + .Case("nlt_uq", 0x15) + .Case("nle_uq", 0x16) + .Case("ord_s", 0x17) + .Case("eq_us", 0x18) + .Case("nge_uq", 0x19) + .Case("ngt_uq", 0x1A) + .Case("false_os", 0x1B) + .Case("neq_os", 0x1C) + .Case("ge_oq", 0x1D) + .Case("gt_oq", 0x1E) + .Case("true_us", 0x1F) .Default(~0U); if (SSEComparisonCode != ~0U) { ExtraImmOp = MCConstantExpr::Create(SSEComparisonCode, getParser().getContext()); if (PatchedName.endswith("ss")) { - PatchedName = "cmpss"; + PatchedName = IsVCMP ? "vcmpss" : "cmpss"; } else if (PatchedName.endswith("sd")) { - PatchedName = "cmpsd"; + PatchedName = IsVCMP ? "vcmpsd" : "cmpsd"; } else if (PatchedName.endswith("ps")) { - PatchedName = "cmpps"; + PatchedName = IsVCMP ? "vcmpps" : "cmpps"; } else { assert(PatchedName.endswith("pd") && "Unexpected mnemonic!"); - PatchedName = "cmppd"; + PatchedName = IsVCMP ? "vcmppd" : "cmppd"; } } } diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp index 0b64cb4..f2cdb5b 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp @@ -85,11 +85,18 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } if (DispSpec.isImm()) { int64_t DispVal = DispSpec.getImm(); @@ -115,13 +122,3 @@ void X86ATTInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ')'; } } - -void X86ATTInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h index 8d5d508..3be4bae 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.h @@ -34,7 +34,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &OS); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &OS); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &OS); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &OS); @@ -69,14 +68,8 @@ public: void printf128mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index 183213d..73bc603 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -200,6 +200,11 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO, case X86II::MO_GOT: O << "@GOT"; break; case X86II::MO_GOTOFF: O << "@GOTOFF"; break; case X86II::MO_PLT: O << "@PLT"; break; + case X86II::MO_TLVP: O << "@TLVP"; break; + case X86II::MO_TLVP_PIC_BASE: + O << "@TLVP" << '-'; + PrintPICBaseSymbol(O); + break; } } @@ -383,6 +388,8 @@ bool X86AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, } if (MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isSymbol()) { printSymbolOperand(MO, O); + if (Subtarget->isPICStyleRIPRel()) + O << "(%rip)"; return false; } if (MO.isReg()) { diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp index 7e0a9bb..a632047 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.cpp @@ -81,12 +81,19 @@ void X86IntelInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } } -void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &BaseReg = MI->getOperand(Op); unsigned ScaleVal = MI->getOperand(Op+1).getImm(); const MCOperand &IndexReg = MI->getOperand(Op+2); const MCOperand &DispSpec = MI->getOperand(Op+3); + const MCOperand &SegReg = MI->getOperand(Op+4); + + // If this has a segment register, print it. + if (SegReg.getReg()) { + printOperand(MI, Op+4, O); + O << ':'; + } O << '['; @@ -104,7 +111,7 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, NeedPlus = true; } - + if (!DispSpec.isImm()) { if (NeedPlus) O << " + "; assert(DispSpec.isExpr() && "non-immediate displacement for LEA?"); @@ -126,13 +133,3 @@ void X86IntelInstPrinter::printLeaMemReference(const MCInst *MI, unsigned Op, O << ']'; } - -void X86IntelInstPrinter::printMemReference(const MCInst *MI, unsigned Op, - raw_ostream &O) { - // If this has a segment register, print it. - if (MI->getOperand(Op+4).getReg()) { - printOperand(MI, Op+4, O); - O << ':'; - } - printLeaMemReference(MI, Op, O); -} diff --git a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h index a0beeb2..4d68074 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelInstPrinter.h @@ -36,7 +36,6 @@ public: void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); - void printLeaMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); void print_pcrel_imm(const MCInst *MI, unsigned OpNo, raw_ostream &O); @@ -81,17 +80,9 @@ public: O << "XMMWORD PTR "; printMemReference(MI, OpNo, O); } - void printlea32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "DWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); - } - void printlea64_32mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - O << "QWORD PTR "; - printLeaMemReference(MI, OpNo, O); + void printf256mem(const MCInst *MI, unsigned OpNo, raw_ostream &O) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo, O); } }; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 4edeca9..09f150b 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -152,6 +152,17 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, case X86II::MO_DARWIN_STUB: break; + case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break; + case X86II::MO_TLVP_PIC_BASE: + Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx); + // Subtract the pic base. + Expr + = MCBinaryExpr::CreateSub(Expr, + MCSymbolRefExpr::Create(GetPICBaseSymbol(), + Ctx), + Ctx); + + break; case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break; case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break; case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break; @@ -266,10 +277,21 @@ static void SimplifyShortMoveForm(MCInst &Inst, unsigned Opcode) { return; // Check whether this is an absolute address. - if (Inst.getOperand(AddrBase + 0).getReg() != 0 || - Inst.getOperand(AddrBase + 2).getReg() != 0 || - Inst.getOperand(AddrBase + 4).getReg() != 0 || - Inst.getOperand(AddrBase + 1).getImm() != 1) + // FIXME: We know TLVP symbol refs aren't, but there should be a better way + // to do this here. + bool Absolute = true; + if (Inst.getOperand(AddrOp).isExpr()) { + const MCExpr *MCE = Inst.getOperand(AddrOp).getExpr(); + if (const MCSymbolRefExpr *SRE = dyn_cast(MCE)) + if (SRE->getKind() == MCSymbolRefExpr::VK_TLVP) + Absolute = false; + } + + if (Absolute && + (Inst.getOperand(AddrBase + 0).getReg() != 0 || + Inst.getOperand(AddrBase + 2).getReg() != 0 || + Inst.getOperand(AddrBase + 4).getReg() != 0 || + Inst.getOperand(AddrBase + 1).getImm() != 1)) return; // If so, rewrite the instruction. @@ -327,6 +349,15 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { switch (OutMI.getOpcode()) { case X86::LEA64_32r: // Handle 'subreg rewriting' for the lea64_32mem operand. lower_lea64_32mem(&OutMI, 1); + // FALL THROUGH. + case X86::LEA64r: + case X86::LEA16r: + case X86::LEA32r: + // LEA should have a segment register, but it must be empty. + assert(OutMI.getNumOperands() == 1+X86::AddrNumOperands && + "Unexpected # of LEA operands"); + assert(OutMI.getOperand(1+X86::AddrSegmentReg).getReg() == 0 && + "LEA has segment specified!"); break; case X86::MOVZX16rr8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rr8); break; case X86::MOVZX16rm8: LowerSubReg32_Op0(OutMI, X86::MOVZX32rm8); break; @@ -364,10 +395,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { LowerUnaryToTwoAddr(OutMI, X86::XOR32rr); // MOV32r0 -> XOR32rr break; - // TAILJMPr, TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have + // TAILJMPr64, CALL64r, CALL64pcrel32 - These instructions have // register inputs modeled as normal uses instead of implicit uses. As such, // truncate off all but the first operand (the callee). FIXME: Change isel. - case X86::TAILJMPr: case X86::TAILJMPr64: case X86::CALL64r: case X86::CALL64pcrel32: { @@ -380,11 +410,20 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { } // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions. + case X86::TAILJMPr: case X86::TAILJMPd: case X86::TAILJMPd64: { + unsigned Opcode; + switch (OutMI.getOpcode()) { + default: assert(0 && "Invalid opcode"); + case X86::TAILJMPr: Opcode = X86::JMP32r; break; + case X86::TAILJMPd: + case X86::TAILJMPd64: Opcode = X86::JMP_1; break; + } + MCOperand Saved = OutMI.getOperand(0); OutMI = MCInst(); - OutMI.setOpcode(X86::TAILJMP_1); + OutMI.setOpcode(Opcode); OutMI.addOperand(Saved); break; } @@ -483,8 +522,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, O << V.getName(); O << " <- "; // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 3, O); + O << '['; + if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) + printOperand(MI, 0, O); + else + O << "undef"; + O << '+'; printOperand(MI, 3, O); O << ']'; O << "+"; printOperand(MI, NOps-2, O); @@ -495,8 +538,9 @@ X86AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { MachineLocation Location; assert (MI->getNumOperands() == 7 && "Invalid no. of machine operands!"); // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(3).isImm()); - Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); + + if (MI->getOperand(0).isReg() && MI->getOperand(3).isImm()) + Location.set(MI->getOperand(0).getReg(), MI->getOperand(3).getImm()); return Location; } @@ -513,6 +557,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; + case X86::TAILJMPr: + case X86::TAILJMPd: + case X86::TAILJMPd64: + // Lower these as normal, but add some comments. + OutStreamer.AddComment("TAILCALL"); + break; + case X86::MOVPC32r: { MCInst TmpInst; // This is a pseudo op for a two instruction sequence with a label, which @@ -578,7 +629,6 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - OutStreamer.EmitInstruction(TmpInst); } diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt index 9f91060..97589c0 100644 --- a/lib/Target/X86/Disassembler/CMakeLists.txt +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -4,8 +4,8 @@ add_llvm_library(LLVMX86Disassembler X86Disassembler.cpp X86DisassemblerDecoder.c ) -# workaround for hanging compilation on MSVC9 -if( MSVC_VERSION EQUAL 1500 ) +# workaround for hanging compilation on MSVC9 and 10 +if( MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) set_property( SOURCE X86Disassembler.cpp PROPERTY COMPILE_FLAGS "/Od" diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 8a5a630..09f1584 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -252,13 +252,8 @@ static bool translateRMRegister(MCInst &mcInst, /// @param mcInst - The MCInst to append to. /// @param insn - The instruction to extract Mod, R/M, and SIB fields /// from. -/// @param sr - Whether or not to emit the segment register. The -/// LEA instruction does not expect a segment-register -/// operand. /// @return - 0 on success; nonzero otherwise -static bool translateRMMemory(MCInst &mcInst, - InternalInstruction &insn, - bool sr) { +static bool translateRMMemory(MCInst &mcInst, InternalInstruction &insn) { // Addresses in an MCInst are represented as five operands: // 1. basereg (register) The R/M base, or (if there is a SIB) the // SIB base @@ -385,10 +380,7 @@ static bool translateRMMemory(MCInst &mcInst, mcInst.addOperand(scaleAmount); mcInst.addOperand(indexReg); mcInst.addOperand(displacement); - - if (sr) - mcInst.addOperand(segmentReg); - + mcInst.addOperand(segmentReg); return false; } @@ -439,9 +431,8 @@ static bool translateRM(MCInst &mcInst, case TYPE_M1616: case TYPE_M1632: case TYPE_M1664: - return translateRMMemory(mcInst, insn, true); case TYPE_LEA: - return translateRMMemory(mcInst, insn, false); + return translateRMMemory(mcInst, insn); } } diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt index e5f84e8..b6aba93 100644 --- a/lib/Target/X86/README-SSE.txt +++ b/lib/Target/X86/README-SSE.txt @@ -36,62 +36,6 @@ The pattern isel got this one right. //===---------------------------------------------------------------------===// -SSE doesn't have [mem] op= reg instructions. If we have an SSE instruction -like this: - - X += y - -and the register allocator decides to spill X, it is cheaper to emit this as: - -Y += [xslot] -store Y -> [xslot] - -than as: - -tmp = [xslot] -tmp += y -store tmp -> [xslot] - -..and this uses one fewer register (so this should be done at load folding -time, not at spiller time). *Note* however that this can only be done -if Y is dead. Here's a testcase: - -@.str_3 = external global [15 x i8] -declare void @printf(i32, ...) -define void @main() { -build_tree.exit: - br label %no_exit.i7 - -no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit - %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.34.i18, %no_exit.i7 ] - %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], - [ %tmp.28.i16, %no_exit.i7 ] - %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00 - %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00 - br i1 false, label %Compute_Tree.exit23, label %no_exit.i7 - -Compute_Tree.exit23: ; preds = %no_exit.i7 - tail call void (i32, ...)* @printf( i32 0 ) - store double %tmp.34.i18, double* null - ret void -} - -We currently emit: - -.BBmain_1: - xorpd %XMM1, %XMM1 - addsd %XMM0, %XMM1 -*** movsd %XMM2, QWORD PTR [%ESP + 8] -*** addsd %XMM2, %XMM1 -*** movsd QWORD PTR [%ESP + 8], %XMM2 - jmp .BBmain_1 # no_exit.i7 - -This is a bugpoint reduced testcase, which is why the testcase doesn't make -much sense (e.g. its an infinite loop). :) - -//===---------------------------------------------------------------------===// - SSE should implement 'select_cc' using 'emulated conditional moves' that use pcmp/pand/pandn/por to do a selection instead of a conditional branch: @@ -122,12 +66,6 @@ LBB_X_2: //===---------------------------------------------------------------------===// -It's not clear whether we should use pxor or xorps / xorpd to clear XMM -registers. The choice may depend on subtarget information. We should do some -more experiments on different x86 machines. - -//===---------------------------------------------------------------------===// - Lower memcpy / memset to a series of SSE 128 bit move instructions when it's feasible. @@ -151,45 +89,6 @@ Perhaps use pxor / xorp* to clear a XMM register first? //===---------------------------------------------------------------------===// -How to decide when to use the "floating point version" of logical ops? Here are -some code fragments: - - movaps LCPI5_5, %xmm2 - divps %xmm1, %xmm2 - mulps %xmm2, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm2 - andps LCPI5_1, %xmm3 - por %xmm2, %xmm3 - movdqa %xmm3, (%edi) - - movaps LCPI5_5, %xmm1 - divps %xmm0, %xmm1 - mulps %xmm1, %xmm3 - mulps 8656(%ecx), %xmm3 - addps 8672(%ecx), %xmm3 - andps LCPI5_6, %xmm1 - andps LCPI5_1, %xmm3 - orps %xmm1, %xmm3 - movaps %xmm3, 112(%esp) - movaps %xmm3, (%ebx) - -Due to some minor source change, the later case ended up using orps and movaps -instead of por and movdqa. Does it matter? - -//===---------------------------------------------------------------------===// - -X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible -to choose between movaps, movapd, and movdqa based on types of source and -destination? - -How about andps, andpd, and pand? Do we really care about the type of the packed -elements? If not, why not always use the "ps" variants which are likely to be -shorter. - -//===---------------------------------------------------------------------===// - External test Nurbs exposed some problems. Look for __ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc emits: @@ -278,41 +177,6 @@ It also exposes some other problems. See MOV32ri -3 and the spills. //===---------------------------------------------------------------------===// -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=25500 - -LLVM is producing bad code. - -LBB_main_4: # cond_true44 - addps %xmm1, %xmm2 - subps %xmm3, %xmm2 - movaps (%ecx), %xmm4 - movaps %xmm2, %xmm1 - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -There are two problems. 1) No need to two loop induction variables. We can -compare against 262144 * 16. 2) Known register coalescer issue. We should -be able eliminate one of the movaps: - - addps %xmm2, %xmm1 <=== Commute! - subps %xmm3, %xmm1 - movaps (%ecx), %xmm4 - movaps %xmm1, %xmm1 <=== Eliminate! - addps %xmm4, %xmm1 - addl $16, %ecx - incl %edx - cmpl $262144, %edx - movaps %xmm3, %xmm2 - movaps %xmm4, %xmm3 - jne LBB_main_4 # cond_true44 - -//===---------------------------------------------------------------------===// - Consider: __m128 test(float a) { @@ -382,22 +246,6 @@ elements are fixed zeros. //===---------------------------------------------------------------------===// -__m128d test1( __m128d A, __m128d B) { - return _mm_shuffle_pd(A, B, 0x3); -} - -compiles to - -shufpd $3, %xmm1, %xmm0 - -Perhaps it's better to use unpckhpd instead? - -unpckhpd %xmm1, %xmm0 - -Don't know if unpckhpd is faster. But it is shorter. - -//===---------------------------------------------------------------------===// - This code generates ugly code, probably due to costs being off or something: define void @test(float* %P, <4 x float>* %P2 ) { @@ -549,6 +397,7 @@ entry: %tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly ret i64 %tmp20 } +declare i64 @ccoshf(float %z.0, float %z.1) nounwind readonly This currently compiles to: @@ -987,3 +836,34 @@ This would be better kept in the SSE unit by treating XMM0 as a 4xfloat and doing a shuffle from v[1] to v[0] then a float store. //===---------------------------------------------------------------------===// + +On SSE4 machines, we compile this code: + +define <2 x float> @test2(<2 x float> %Q, <2 x float> %R, + <2 x float> *%P) nounwind { + %Z = fadd <2 x float> %Q, %R + + store <2 x float> %Z, <2 x float> *%P + ret <2 x float> %Z +} + +into: + +_test2: ## @test2 +## BB#0: + insertps $0, %xmm2, %xmm2 + insertps $16, %xmm3, %xmm2 + insertps $0, %xmm0, %xmm3 + insertps $16, %xmm1, %xmm3 + addps %xmm2, %xmm3 + movq %xmm3, (%rdi) + movaps %xmm3, %xmm0 + pshufd $1, %xmm3, %xmm1 + ## kill: XMM1 XMM1 + ret + +The insertps's of $0 are pointless complex copies. + +//===---------------------------------------------------------------------===// + + diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index e8f7c5d..78c4dc0 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -1,27 +1,5 @@ //===- README_X86_64.txt - Notes for X86-64 code gen ----------------------===// -Implement different PIC models? Right now we only support Mac OS X with small -PIC code model. - -//===---------------------------------------------------------------------===// - -For this: - -extern void xx(void); -void bar(void) { - xx(); -} - -gcc compiles to: - -.globl _bar -_bar: - jmp _xx - -We need to do the tailcall optimization as well. - -//===---------------------------------------------------------------------===// - AMD64 Optimization Manual 8.2 has some nice information about optimizing integer multiplication by a constant. How much of it applies to Intel's X86-64 implementation? There are definite trade-offs to consider: latency vs. register @@ -96,123 +74,14 @@ gcc: movq %rax, (%rdx) ret -//===---------------------------------------------------------------------===// - -Vararg function prologue can be further optimized. Currently all XMM registers -are stored into register save area. Most of them can be eliminated since the -upper bound of the number of XMM registers used are passed in %al. gcc produces -something like the following: - - movzbl %al, %edx - leaq 0(,%rdx,4), %rax - leaq 4+L2(%rip), %rdx - leaq 239(%rsp), %rax - jmp *%rdx - movaps %xmm7, -15(%rax) - movaps %xmm6, -31(%rax) - movaps %xmm5, -47(%rax) - movaps %xmm4, -63(%rax) - movaps %xmm3, -79(%rax) - movaps %xmm2, -95(%rax) - movaps %xmm1, -111(%rax) - movaps %xmm0, -127(%rax) -L2: - -It jumps over the movaps that do not need to be stored. Hard to see this being -significant as it added 5 instruciton (including a indirect branch) to avoid -executing 0 to 8 stores in the function prologue. - -Perhaps we can optimize for the common case where no XMM registers are used for -parameter passing. i.e. is %al == 0 jump over all stores. Or in the case of a -leaf function where we can determine that no XMM input parameter is need, avoid -emitting the stores at all. - -//===---------------------------------------------------------------------===// +And the codegen is even worse for the following +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): + void fill1(char *s, int a) + { + __builtin_memset(s, a, 15); + } -AMD64 has a complex calling convention for aggregate passing by value: - -1. If the size of an object is larger than two eightbytes, or in C++, is a non- - POD structure or union type, or contains unaligned fields, it has class - MEMORY. -2. Both eightbytes get initialized to class NO_CLASS. -3. Each field of an object is classified recursively so that always two fields - are considered. The resulting class is calculated according to the classes - of the fields in the eightbyte: - (a) If both classes are equal, this is the resulting class. - (b) If one of the classes is NO_CLASS, the resulting class is the other - class. - (c) If one of the classes is MEMORY, the result is the MEMORY class. - (d) If one of the classes is INTEGER, the result is the INTEGER. - (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, MEMORY is used as - class. - (f) Otherwise class SSE is used. -4. Then a post merger cleanup is done: - (a) If one of the classes is MEMORY, the whole argument is passed in memory. - (b) If SSEUP is not preceeded by SSE, it is converted to SSE. - -Currently llvm frontend does not handle this correctly. - -Problem 1: - typedef struct { int i; double d; } QuadWordS; -It is currently passed in two i64 integer registers. However, gcc compiled -callee expects the second element 'd' to be passed in XMM0. - -Problem 2: - typedef struct { int32_t i; float j; double d; } QuadWordS; -The size of the first two fields == i64 so they will be combined and passed in -a integer register RDI. The third field is still passed in XMM0. - -Problem 3: - typedef struct { int64_t i; int8_t j; int64_t d; } S; - void test(S s) -The size of this aggregate is greater than two i64 so it should be passed in -memory. Currently llvm breaks this down and passed it in three integer -registers. - -Problem 4: -Taking problem 3 one step ahead where a function expects a aggregate value -in memory followed by more parameter(s) passed in register(s). - void test(S s, int b) - -LLVM IR does not allow parameter passing by aggregates, therefore it must break -the aggregates value (in problem 3 and 4) into a number of scalar values: - void %test(long %s.i, byte %s.j, long %s.d); - -However, if the backend were to lower this code literally it would pass the 3 -values in integer registers. To force it be passed in memory, the frontend -should change the function signiture to: - void %test(long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); -And the callee would look something like this: - call void %test( undef, undef, undef, undef, undef, undef, - %tmp.s.i, %tmp.s.j, %tmp.s.d ); -The first 6 undef parameters would exhaust the 6 integer registers used for -parameter passing. The following three integer values would then be forced into -memory. - -For problem 4, the parameter 'd' would be moved to the front of the parameter -list so it will be passed in register: - void %test(int %d, - long %undef1, long %undef2, long %undef3, long %undef4, - long %undef5, long %undef6, - long %s.i, byte %s.j, long %s.d); - -//===---------------------------------------------------------------------===// - -Right now the asm printer assumes GlobalAddress are accessed via RIP relative -addressing. Therefore, it is not possible to generate this: - movabsq $__ZTV10polynomialIdE+16, %rax - -That is ok for now since we currently only support small model. So the above -is selected as - leaq __ZTV10polynomialIdE+16(%rip), %rax - -This is probably slightly slower but is much shorter than movabsq. However, if -we were to support medium or larger code models, we need to use the movabs -instruction. We should probably introduce something like AbsoluteAddress to -distinguish it from GlobalAddress so the asm printer and JIT code emitter can -do the right thing. +For this version, we duplicate the computation of the constant to store. //===---------------------------------------------------------------------===// @@ -298,3 +167,107 @@ be able to recognize the zero extend. This could also presumably be implemented if we have whole-function selectiondags. //===---------------------------------------------------------------------===// + +Take the following C code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640): + +struct u1 +{ + float x; + float y; +}; + +float foo(struct u1 u) +{ + return u.x + u.y; +} + +Optimizes to the following IR: +define float @foo(double %u.0) nounwind readnone { +entry: + %tmp8 = bitcast double %u.0 to i64 ; [#uses=2] + %tmp6 = trunc i64 %tmp8 to i32 ; [#uses=1] + %tmp7 = bitcast i32 %tmp6 to float ; [#uses=1] + %tmp2 = lshr i64 %tmp8, 32 ; [#uses=1] + %tmp3 = trunc i64 %tmp2 to i32 ; [#uses=1] + %tmp4 = bitcast i32 %tmp3 to float ; [#uses=1] + %0 = fadd float %tmp7, %tmp4 ; [#uses=1] + ret float %0 +} + +And current llvm-gcc/clang output: + movd %xmm0, %rax + movd %eax, %xmm1 + shrq $32, %rax + movd %eax, %xmm0 + addss %xmm1, %xmm0 + ret + +We really shouldn't move the floats to RAX, only to immediately move them +straight back to the XMM registers. + +There really isn't any good way to handle this purely in IR optimizers; it +could possibly be handled by changing the output of the fronted, though. It +would also be feasible to add a x86-specific DAGCombine to optimize the +bitcast+trunc+(lshr+)bitcast combination. + +//===---------------------------------------------------------------------===// + +Take the following code +(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653): +extern unsigned long table[]; +unsigned long foo(unsigned char *p) { + unsigned long tag = *p; + return table[tag >> 4] + table[tag & 0xf]; +} + +Current code generated: + movzbl (%rdi), %eax + movq %rax, %rcx + andq $240, %rcx + shrq %rcx + andq $15, %rax + movq table(,%rax,8), %rax + addq table(%rcx), %rax + ret + +Issues: +1. First movq should be movl; saves a byte. +2. Both andq's should be andl; saves another two bytes. I think this was + implemented at one point, but subsequently regressed. +3. shrq should be shrl; saves another byte. +4. The first andq can be completely eliminated by using a slightly more + expensive addressing mode. + +//===---------------------------------------------------------------------===// + +Consider the following (contrived testcase, but contains common factors): + +#include +int test(int x, ...) { + int sum, i; + va_list l; + va_start(l, x); + for (i = 0; i < x; i++) + sum += va_arg(l, int); + va_end(l); + return sum; +} + +Testcase given in C because fixing it will likely involve changing the IR +generated for it. The primary issue with the result is that it doesn't do any +of the optimizations which are possible if we know the address of a va_list +in the current function is never taken: +1. We shouldn't spill the XMM registers because we only call va_arg with "int". +2. It would be nice if we could scalarrepl the va_list. +3. Probably overkill, but it'd be cool if we could peel off the first five +iterations of the loop. + +Other optimizations involving functions which use va_arg on floats which don't +have the address of a va_list taken: +1. Conversely to the above, we shouldn't spill general registers if we only + call va_arg on "double". +2. If we know nothing more than 64 bits wide is read from the XMM registers, + we can change the spilling code to reduce the amount of stack used by half. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index d4545a6..efc0cd8 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1103,57 +1103,6 @@ be folded into: shl [mem], 1 //===---------------------------------------------------------------------===// -This testcase misses a read/modify/write opportunity (from PR1425): - -void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i>0; -} - -We compile it down to: - -LBB1_2: # bb - movl (%esi,%edi,4), %ebx - addl (%ecx,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB1_2 # bb - -the inner loop should add to the memory location (%ecx,%edi,4), saving -a mov. Something like: - - movl (%esi,%edi,4), %ebx - addl (%edx,%edi,4), %ebx - addl %ebx, (%ecx,%edi,4) - -Here is another interesting example: - -void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){ - int i; - for(i=0; i>0; -} - -We miss the r/m/w opportunity here by using 2 subs instead of an add+sub[mem]: - -LBB9_2: # bb - movl (%ecx,%edi,4), %ebx - subl (%esi,%edi,4), %ebx - subl (%edx,%edi,4), %ebx - movl %ebx, (%ecx,%edi,4) - incl %edi - cmpl %eax, %edi - jne LBB9_2 # bb - -Additionally, LSR should rewrite the exit condition of these loops to use -a stride-4 IV, would would allow all the scales in the loop to go away. -This would result in smaller code and more efficient microops. - -//===---------------------------------------------------------------------===// - In SSE mode, we turn abs and neg into a load from the constant pool plus a xor or and instruction, for example: @@ -1301,15 +1250,8 @@ FirstOnet: xorl %eax, %eax ret -There are a few possible improvements here: -1. We should be able to eliminate the dead load into %ecx -2. We could change the "movl 8(%esp), %eax" into - "movzwl 10(%esp), %eax"; this lets us change the cmpl - into a testl, which is shorter, and eliminate the shift. - -We could also in theory eliminate the branch by using a conditional -for the address of the load, but that seems unlikely to be worthwhile -in general. +We could change the "movl 8(%esp), %eax" into "movzwl 10(%esp), %eax"; this +lets us change the cmpl into a testl, which is shorter, and eliminate the shift. //===---------------------------------------------------------------------===// @@ -1331,22 +1273,23 @@ bb7: ; preds = %entry to: -_foo: +foo: # @foo +# BB#0: # %entry + movl 4(%esp), %ecx cmpb $0, 16(%esp) - movl 12(%esp), %ecx + je .LBB0_2 +# BB#1: # %bb movl 8(%esp), %eax - movl 4(%esp), %edx - je LBB1_2 # bb7 -LBB1_1: # bb - addl %edx, %eax + addl %ecx, %eax ret -LBB1_2: # bb7 - movl %edx, %eax - subl %ecx, %eax +.LBB0_2: # %bb7 + movl 12(%esp), %edx + movl %ecx, %eax + subl %edx, %eax ret -The coalescer could coalesce "edx" with "eax" to avoid the movl in LBB1_2 -if it commuted the addl in LBB1_1. +There's an obviously unnecessary movl in .LBB0_2, and we could eliminate a +couple more movls by putting 4(%esp) into %eax instead of %ecx. //===---------------------------------------------------------------------===// @@ -1396,8 +1339,7 @@ Also check why xmm7 is not used at all in the function. //===---------------------------------------------------------------------===// -Legalize loses track of the fact that bools are always zero extended when in -memory. This causes us to compile abort_gzip (from 164.gzip) from: +Take the following: target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin8" @@ -1416,16 +1358,15 @@ bb4.i: ; preds = %entry } declare void @exit(i32) noreturn nounwind -into: - -_abort_gzip: +This compiles into: +_abort_gzip: ## @abort_gzip +## BB#0: ## %entry subl $12, %esp movb _in_exit.4870.b, %al - notb %al - testb $1, %al - jne LBB1_2 ## bb4.i -LBB1_1: ## bb.i - ... + cmpb $1, %al + jne LBB0_2 + +We somehow miss folding the movb into the cmpb. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 22e89a5..677781d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -35,6 +35,10 @@ class formatted_raw_ostream; FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); +/// createGlobalBaseRegPass - This pass initializes a global base +/// register for PIC on x86-32. +FunctionPass* createGlobalBaseRegPass(); + /// createX86FloatingPointStackifierPass - This function returns a pass which /// converts floating point register references and pseudo instructions into /// floating point stack references and physical instructions. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index 151087f..2cf65c1 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -23,13 +23,13 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; -namespace { static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); case X86::reloc_pcrel_1byte: case FK_Data_1: return 0; + case X86::reloc_pcrel_2byte: case FK_Data_2: return 1; case X86::reloc_pcrel_4byte: case X86::reloc_riprel_4byte: @@ -39,6 +39,7 @@ static unsigned getFixupKindLog2Size(unsigned Kind) { } } +namespace { class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) @@ -60,6 +61,7 @@ public: bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const; }; +} // end anonymous namespace static unsigned getRelaxedOpcode(unsigned Op) { switch (Op) { @@ -75,7 +77,6 @@ static unsigned getRelaxedOpcode(unsigned Op) { case X86::JG_1: return X86::JG_4; case X86::JLE_1: return X86::JLE_4; case X86::JL_1: return X86::JL_4; - case X86::TAILJMP_1: case X86::JMP_1: return X86::JMP_4; case X86::JNE_1: return X86::JNE_4; case X86::JNO_1: return X86::JNO_4; @@ -180,6 +181,7 @@ bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const { /* *** */ +namespace { class ELFX86AsmBackend : public X86AsmBackend { public: ELFX86AsmBackend(const Target &T) @@ -281,7 +283,7 @@ public: } }; -} +} // end anonymous namespace TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a5774e1..a6a1e4e 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -42,7 +42,7 @@ def RetCC_X86Common : CallingConv<[ // MMX vector types are always returned in MM0. If the target doesn't have // MM0, it doesn't support these vector types. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToReg<[MM0]>>, + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[MM0]>>, // Long double types are always returned in ST0 (even with SSE). CCIfType<[f80], CCAssignToReg<[ST0, ST1]>> @@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[ // returned in RAX. This disagrees with ABI documentation but is bug // compatible with gcc. CCIfType<[v1i64], CCAssignToReg<[RAX]>>, - CCIfType<[v8i8, v4i16, v2i32, v2f32], CCAssignToReg<[XMM0, XMM1]>>, + CCIfType<[v8i8, v4i16, v2i32], CCAssignToReg<[XMM0, XMM1]>>, CCDelegateTo ]>; @@ -155,7 +155,7 @@ def CC_X86_64_C : CallingConv<[ // The first 8 MMX (except for v1i64) vector arguments are passed in XMM // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], + CCIfType<[v8i8, v4i16, v2i32], CCIfSubtarget<"isTargetDarwin()", CCIfSubtarget<"hasSSE2()", CCPromoteToType>>>, @@ -177,7 +177,7 @@ def CC_X86_64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], CCAssignToStack<8, 8>> + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> ]>; // Calling convention used on Win64 @@ -195,7 +195,7 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect>, // The first 4 MMX vector arguments are passed in GPRs. - CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32], + CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType>, // The first 4 integer arguments are passed in integer registers. @@ -254,7 +254,7 @@ def CC_X86_32_Common : CallingConv<[ // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx // registers if the call is not a vararg call. - CCIfNotVarArg>>, // Integer/Float values get stored in stack slots that are 4 bytes in diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 8f02604..f13669b 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -138,7 +138,7 @@ bool Emitter::runOnMachineFunction(MachineFunction &MF) { // MOVPC32r is basically a call plus a pop instruction. if (Desc.getOpcode() == X86::MOVPC32r) emitInstruction(*I, &II->get(X86::POP32r)); - NumEmitted++; // Keep track of the # of mi's emitted + ++NumEmitted; // Keep track of the # of mi's emitted } } } while (MCE.finishFunction(MF)); @@ -730,9 +730,9 @@ void Emitter::emitInstruction(const MachineInstr &MI, case X86II::MRMDestMem: { MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, - getX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands) + getX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands) .getReg())); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) emitConstant(MI.getOperand(CurOp++).getImm(), X86II::getSizeOfImm(Desc->TSFlags)); @@ -750,13 +750,7 @@ void Emitter::emitInstruction(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - // FIXME: Maybe lea should have its own form? - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; + int AddrOperands = X86::AddrNumOperands; intptr_t PCAdj = (CurOp + AddrOperands + 1 != NumOps) ? X86II::getSizeOfImm(Desc->TSFlags) : 0; @@ -810,14 +804,14 @@ void Emitter::emitInstruction(const MachineInstr &MI, case X86II::MRM2m: case X86II::MRM3m: case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: { - intptr_t PCAdj = (CurOp + X86AddrNumOperands != NumOps) ? - (MI.getOperand(CurOp+X86AddrNumOperands).isImm() ? + intptr_t PCAdj = (CurOp + X86::AddrNumOperands != NumOps) ? + (MI.getOperand(CurOp+X86::AddrNumOperands).isImm() ? X86II::getSizeOfImm(Desc->TSFlags) : 4) : 0; MCE.emitByte(BaseOpcode); emitMemModRMByte(MI, CurOp, (Desc->TSFlags & X86II::FormMask)-X86II::MRM0m, PCAdj); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp == NumOps) break; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1bc5eb7..cdde24a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -23,7 +23,9 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -52,20 +54,7 @@ class X86FastISel : public FastISel { bool X86ScalarSSEf32; public: - explicit X86FastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) - : FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ) { + explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; X86ScalarSSEf64 = Subtarget->hasSSE2(); @@ -96,6 +85,8 @@ private: bool X86SelectStore(const Instruction *I); + bool X86SelectRet(const Instruction *I); + bool X86SelectCmp(const Instruction *I); bool X86SelectZExt(const Instruction *I); @@ -117,6 +108,7 @@ private: bool X86SelectCall(const Instruction *I); CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); + CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); const X86InstrInfo *getInstrInfo() const { return getTargetMachine()->getInstrInfo(); @@ -190,6 +182,20 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, return CC_X86_32_C; } +/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling +/// convention. +CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, + bool isTaillCall) { + if (Subtarget->is64Bit()) { + if (Subtarget->isTargetWin64()) + return RetCC_X86_Win64_C; + else + return RetCC_X86_64_C; + } + + return RetCC_X86_32_C; +} + /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. @@ -242,7 +248,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } ResultReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc), ResultReg), AM); return true; } @@ -261,7 +268,7 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, case MVT::i1: { // Mask out all but lowest bit. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); Val = AndResult; } @@ -278,7 +285,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, break; } - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM).addReg(Val); return true; } @@ -306,7 +314,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, } if (Opc) { - addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(Opc)), AM) .addImm(Signed ? (uint64_t) CI->getSExtValue() : CI->getZExtValue()); return true; @@ -342,6 +351,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { const User *U = NULL; unsigned Opcode = Instruction::UserOp1; if (const Instruction *I = dyn_cast(V)) { + // Don't walk into other basic blocks; it's possible we haven't + // visited them yet, so the instructions may not yet be assigned + // virtual registers. + if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) + return false; + Opcode = I->getOpcode(); U = I; } else if (const ConstantExpr *C = dyn_cast(V)) { @@ -349,6 +364,12 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { U = C; } + if (const PointerType *Ty = dyn_cast(V->getType())) + if (Ty->getAddressSpace() > 255) + // Fast instruction selection doesn't support the special + // address spaces. + return false; + switch (Opcode) { default: break; case Instruction::BitCast: @@ -370,8 +391,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { case Instruction::Alloca: { // Do static allocas. const AllocaInst *A = cast(V); - DenseMap::iterator SI = StaticAllocaMap.find(A); - if (SI != StaticAllocaMap.end()) { + DenseMap::iterator SI = + FuncInfo.StaticAllocaMap.find(A); + if (SI != FuncInfo.StaticAllocaMap.end()) { AM.BaseType = X86AddressMode::FrameIndexBase; AM.Base.FrameIndex = SI->second; return true; @@ -411,20 +433,33 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { Disp += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - if (const ConstantInt *CI = dyn_cast(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - } else if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - } else - // Unsupported. - goto unsupported_gep; + SmallVector Worklist; + Worklist.push_back(Op); + do { + Op = Worklist.pop_back_val(); + if (const ConstantInt *CI = dyn_cast(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + } else if (isa(Op) && + isa(cast(Op)->getOperand(1))) { + // An add with a constant operand. Fold the constant. + ConstantInt *CI = + cast(cast(Op)->getOperand(1)); + Disp += CI->getSExtValue() * S; + // Add the other operand back to the work list. + Worklist.push_back(cast(Op)->getOperand(0)); + } else if (IndexReg == 0 && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op).first; + if (IndexReg == 0) + return false; + } else + // Unsupported. + goto unsupported_gep; + } while (!Worklist.empty()); } } // Check for displacement overflow. @@ -473,7 +508,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // If this reference is relative to the pic base, set it now. if (isGlobalRelativeToPICBase(GVFlags)) { // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } // Unless the ABI requires an extra load, return a direct reference to @@ -504,6 +539,9 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { StubAM.GV = GV; StubAM.GVOpFlags = GVFlags; + // Prepare for inserting code in the local-value area. + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + if (TLI.getPointerTy() == MVT::i64) { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; @@ -516,8 +554,13 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { } LoadReg = createResultReg(RC); - addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); - + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); + + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); + // Prevent loading GV stub multiple times in same MBB. LocalValueMap[V] = LoadReg; } @@ -642,6 +685,93 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { return X86FastEmitStore(VT, I->getOperand(0), AM); } +/// X86SelectRet - Select and emit code to implement ret instructions. +bool X86FastISel::X86SelectRet(const Instruction *I) { + const ReturnInst *Ret = cast(I); + const Function &F = *I->getParent()->getParent(); + + if (!FuncInfo.CanLowerReturn) + return false; + + CallingConv::ID CC = F.getCallingConv(); + if (CC != CallingConv::C && + CC != CallingConv::Fast && + CC != CallingConv::X86_FastCall) + return false; + + if (Subtarget->isTargetWin64()) + return false; + + // Don't handle popping bytes on return for now. + if (FuncInfo.MF->getInfo() + ->getBytesToPopOnReturn() != 0) + return 0; + + // fastcc with -tailcallopt is intended to provide a guaranteed + // tail call optimization. Fastisel doesn't know how to do that. + if (CC == CallingConv::Fast && GuaranteedTailCallOpt) + return false; + + // Let SDISel handle vararg functions. + if (F.isVarArg()) + return false; + + if (Ret->getNumOperands() > 0) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + // Analyze operands of the call, assigning locations to each operand. + SmallVector ValLocs; + CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); + CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); + + const Value *RV = Ret->getOperand(0); + unsigned Reg = getRegForValue(RV); + if (Reg == 0) + return false; + + // Only handle a single return value for now. + if (ValLocs.size() != 1) + return false; + + CCValAssign &VA = ValLocs[0]; + + // Don't bother handling odd stuff for now. + if (VA.getLocInfo() != CCValAssign::Full) + return false; + // Only handle register returns for now. + if (!VA.isRegLoc()) + return false; + // TODO: For now, don't try to handle cases where getLocInfo() + // says Full but the types don't match. + if (VA.getValVT() != TLI.getValueType(RV->getType())) + return false; + + // The calling-convention tables for x87 returns don't tell + // the whole story. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) + return false; + + // Make the copy. + unsigned SrcReg = Reg + VA.getValNo(); + unsigned DstReg = VA.getLocReg(); + const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); + // Avoid a cross-class copy. This is very unlikely. + if (!SrcRC->contains(DstReg)) + return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + DstReg).addReg(SrcReg); + + // Mark the register as live out of the function. + MRI.addLiveOut(VA.getLocReg()); + } + + // Now emit the RET. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); + return true; +} + /// X86SelectLoad - Select and emit code to implement load instructions. /// bool X86FastISel::X86SelectLoad(const Instruction *I) { @@ -661,15 +791,15 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { return false; } -static unsigned X86ChooseCmpOpcode(EVT VT) { +static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { switch (VT.getSimpleVT().SimpleTy) { default: return 0; case MVT::i8: return X86::CMP8rr; case MVT::i16: return X86::CMP16rr; case MVT::i32: return X86::CMP32rr; case MVT::i64: return X86::CMP64rr; - case MVT::f32: return X86::UCOMISSrr; - case MVT::f64: return X86::UCOMISDrr; + case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0; + case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0; } } @@ -706,18 +836,21 @@ bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, // CMPri, otherwise use CMPrr. if (const ConstantInt *Op1C = dyn_cast(Op1)) { if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { - BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) - .addImm(Op1C->getSExtValue()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc)) + .addReg(Op0Reg) + .addImm(Op1C->getSExtValue()); return true; } } - unsigned CompareOpc = X86ChooseCmpOpcode(VT); + unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget); if (CompareOpc == 0) return false; unsigned Op1Reg = getRegForValue(Op1); if (Op1Reg == 0) return false; - BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) + .addReg(Op0Reg) + .addReg(Op1Reg); return true; } @@ -739,9 +872,10 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned EReg = createResultReg(&X86::GR8RegClass); unsigned NPReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); - BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNPr), NPReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); UpdateValueMap(I, ResultReg); return true; @@ -752,9 +886,13 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned NEReg = createResultReg(&X86::GR8RegClass); unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); - BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); - BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETNEr), NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::SETPr), PReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::OR8rr), ResultReg) + .addReg(PReg).addReg(NEReg); UpdateValueMap(I, ResultReg); return true; } @@ -793,7 +931,7 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); UpdateValueMap(I, ResultReg); return true; } @@ -819,8 +957,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Unconditional branches are selected by tablegen-generated code. // Handle a conditional branch. const BranchInst *BI = cast(I); - MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; - MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; + MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; + MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; // Fold the common case of a conditional branch with a comparison. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { @@ -829,7 +967,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Try to take advantage of fallthrough opportunities. CmpInst::Predicate Predicate = CI->getPredicate(); - if (MBB->isLayoutSuccessor(TrueMBB)) { + if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); } @@ -878,16 +1016,18 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (!X86FastEmitCompare(Op0, Op1, VT)) return false; - BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) + .addMBB(TrueMBB); if (Predicate == CmpInst::FCMP_UNE) { // X86 requires a second branch to handle UNE (and OEQ, // which is mapped to UNE above). - BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4)) + .addMBB(TrueMBB); } - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } else if (ExtractValueInst *EI = @@ -910,10 +1050,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { const MachineInstr *SetMI = 0; - unsigned Reg = lookUpRegForValue(EI); + unsigned Reg = getRegForValue(EI); for (MachineBasicBlock::const_reverse_iterator - RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { + RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend(); + RI != RE; ++RI) { const MachineInstr &MI = *RI; if (MI.definesRegister(Reg)) { @@ -938,11 +1079,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpCode = SetMI->getOpcode(); if (OpCode == X86::SETOr || OpCode == X86::SETBr) { - BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? - X86::JO_4 : X86::JB_4)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4)) .addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } } @@ -954,10 +1095,12 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); - BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); - FastEmitBranch(FalseMBB); - MBB->addSuccessor(TrueMBB); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(OpReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); return true; } @@ -1014,7 +1157,7 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { // Fold immediate in shl(x,3). if (const ConstantInt *CI = dyn_cast(I->getOperand(1))) { unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpImm), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); UpdateValueMap(I, ResultReg); return true; @@ -1022,17 +1165,19 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + CReg).addReg(Op1Reg); // The shift instruction uses X86::CL. If we defined a super-register - // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what - // we're doing here. + // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. if (CReg != X86::CL) - BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) - .addReg(CReg).addImm(X86::sub_8bit); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::KILL), X86::CL) + .addReg(CReg, RegState::Kill); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg) + .addReg(Op0Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1064,9 +1209,11 @@ bool X86FastISel::X86SelectSelect(const Instruction *I) { unsigned Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; - BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) + .addReg(Op0Reg).addReg(Op0Reg); unsigned ResultReg = createResultReg(RC); - BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) + .addReg(Op1Reg).addReg(Op2Reg); UpdateValueMap(I, ResultReg); return true; } @@ -1080,7 +1227,9 @@ bool X86FastISel::X86SelectFPExt(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR64RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSS2SDrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1097,7 +1246,9 @@ bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { unsigned OpReg = getRegForValue(V); if (OpReg == 0) return false; unsigned ResultReg = createResultReg(X86::FR32RegisterClass); - BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(X86::CVTSD2SSrr), ResultReg) + .addReg(OpReg); UpdateValueMap(I, ResultReg); return true; } @@ -1132,7 +1283,8 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) { const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; unsigned CopyReg = createResultReg(CopyRC); - BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) + .addReg(InputReg); // Then issue an extract_subreg. unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, @@ -1153,14 +1305,18 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) { switch (CI->getIntrinsicID()) { default: break; case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: + case Intrinsic::uadd_with_overflow: { // Cheat a little. We know that the registers for "add" and "seto" are // allocated sequentially. However, we only keep track of the register // for "add" in the value map. Use extractvalue's index to get the // correct register for "seto". - UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); + unsigned OpReg = getRegForValue(Agg); + if (OpReg == 0) + return false; + UpdateValueMap(I, OpReg + *EI->idx_begin()); return true; } + } } return false; @@ -1174,8 +1330,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); - const Value *Op1 = I.getOperand(1); // The guard's value. - const AllocaInst *Slot = cast(I.getOperand(2)); + const Value *Op1 = I.getArgOperand(0); // The guard's value. + const AllocaInst *Slot = cast(I.getArgOperand(1)); // Grab the frame index. X86AddressMode AM; @@ -1186,7 +1342,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast(I.getOperand(2)); + ConstantInt *CI = dyn_cast(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); assert(CI && "Non-constant type in Intrinsic::objectsize?"); @@ -1204,8 +1360,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg). - addImm(CI->getZExtValue() == 0 ? -1ULL : 0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). + addImm(CI->isZero() ? -1ULL : 0); UpdateValueMap(&I, ResultReg); return true; } @@ -1218,12 +1374,12 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); // FIXME may need to add RegState::Debug to any registers produced, // although ESP/EBP should be the only ones at the moment. - addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). - addMetadata(DI->getVariable()); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). + addImm(0).addMetadata(DI->getVariable()); return true; } case Intrinsic::trap: { - BuildMI(MBB, DL, TII.get(X86::TRAP)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP)); return true; } case Intrinsic::sadd_with_overflow: @@ -1241,8 +1397,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { if (!isTypeLegal(RetTy, VT)) return false; - const Value *Op1 = I.getOperand(1); - const Value *Op2 = I.getOperand(2); + const Value *Op1 = I.getArgOperand(0); + const Value *Op2 = I.getArgOperand(1); unsigned Reg1 = getRegForValue(Op1); unsigned Reg2 = getRegForValue(Op2); @@ -1259,7 +1415,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { return false; unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) + .addReg(Reg1).addReg(Reg2); unsigned DestReg1 = UpdateValueMap(&I, ResultReg); // If the add with overflow is an intra-block value then we just want to @@ -1277,7 +1434,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { unsigned Opc = X86::SETBr; if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) Opc = X86::SETOr; - BuildMI(MBB, DL, TII.get(Opc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); return true; } } @@ -1285,7 +1442,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { bool X86FastISel::X86SelectCall(const Instruction *I) { const CallInst *CI = cast(I); - const Value *Callee = I->getOperand(0); + const Value *Callee = CI->getCalledValue(); // Can't handle inline asm yet. if (isa(Callee)) @@ -1314,6 +1471,10 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (FTy->isVarArg()) return false; + // Fast-isel doesn't know about callee-pop yet. + if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) + return false; + // Handle *simple* calls for now. const Type *RetTy = CS.getType(); EVT RetVT; @@ -1387,6 +1548,12 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); + + // Allocate shadow area for Win64 + if (Subtarget->isTargetWin64()) { + CCInfo.AllocateStack(32, 8); + } + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); // Get a count of how many bytes are to be pushed on the stack. @@ -1394,7 +1561,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) + .addImm(NumBytes); // Process argument: walk the register/memloc assignments, inserting // copies / loads. @@ -1449,11 +1617,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } if (VA.isRegLoc()) { - TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), - Arg, RC, RC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + VA.getLocReg()).addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1475,12 +1640,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // ELF / PIC requires GOT in the EBX register before function calls via PLT // GOT pointer. if (Subtarget->isPICStyleGOT()) { - TargetRegisterClass *RC = X86::GR32RegisterClass; - unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, - DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + X86::EBX).addReg(Base); } // Issue the call. @@ -1488,7 +1650,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addReg(CalleeOp); } else { // Direct call. @@ -1517,7 +1680,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { } - MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) + .addGlobalAddress(GV, 0, OpFlags); } // Add an implicit use GOT pointer in EBX. @@ -1530,9 +1694,11 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); - BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) + .addImm(NumBytes).addImm(0); // Now handle call return value (if any). + SmallVector UsedRegs; if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { SmallVector RVLocs; CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); @@ -1542,7 +1708,6 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); EVT CopyVT = RVLocs[0].getValVT(); TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); - TargetRegisterClass *SrcRC = DstRC; // If this is a call to a function that returns an fp value on the x87 fp // stack, but where we prefer to use the value in xmm registers, copy it @@ -1551,15 +1716,14 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { RVLocs[0].getLocReg() == X86::ST1) && isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { CopyVT = MVT::f80; - SrcRC = X86::RSTRegisterClass; DstRC = X86::RFP80RegisterClass; } unsigned ResultReg = createResultReg(DstRC); - bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - RVLocs[0].getLocReg(), DstRC, SrcRC, DL); - assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; - Emitted = true; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(RVLocs[0].getLocReg()); + UsedRegs.push_back(RVLocs[0].getLocReg()); + if (CopyVT != RVLocs[0].getValVT()) { // Round the F80 the right size, which also moves to the appropriate xmm // register. This is accomplished by storing the F80 value in memory and @@ -1568,18 +1732,21 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; int FI = MFI.CreateStackObject(MemSize, MemSize, false); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc)), FI) + .addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; ResultReg = createResultReg(DstRC); - addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); + addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), FI); } if (AndToI1) { // Mask out all but lowest bit for some call which produces an i1. unsigned AndResult = createResultReg(X86::GR8RegisterClass); - BuildMI(MBB, DL, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); ResultReg = AndResult; } @@ -1587,6 +1754,9 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { UpdateValueMap(I, ResultReg); } + // Set all unused physreg defs as dead. + static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); + return true; } @@ -1599,6 +1769,8 @@ X86FastISel::TargetSelectInstruction(const Instruction *I) { return X86SelectLoad(I); case Instruction::Store: return X86SelectStore(I); + case Instruction::Ret: + return X86SelectRet(I); case Instruction::ICmp: case Instruction::FCmp: return X86SelectCmp(I); @@ -1699,7 +1871,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { else Opc = X86::LEA64r; unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } return 0; @@ -1717,10 +1890,10 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { unsigned char OpFlag = 0; if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic OpFlag = X86II::MO_PIC_BASE_OFFSET; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOTOFF; - PICBase = getInstrInfo()->getGlobalBaseReg(&MF); + PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } else if (Subtarget->isPICStyleRIPRel() && TM.getCodeModel() == CodeModel::Small) { PICBase = X86::RIP; @@ -1729,7 +1902,8 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { // Create the load from the constant pool. unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); unsigned ResultReg = createResultReg(RC); - addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), + addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), MCPOffset, PICBase, OpFlag); return ResultReg; @@ -1743,7 +1917,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { // various places, but TargetMaterializeAlloca also needs a check // in order to avoid recursion between getRegForValue, // X86SelectAddrss, and TargetMaterializeAlloca. - if (!StaticAllocaMap.count(C)) + if (!FuncInfo.StaticAllocaMap.count(C)) return 0; X86AddressMode AM; @@ -1752,24 +1926,13 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); - addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ResultReg), AM); return ResultReg; } namespace llvm { - llvm::FastISel *X86::createFastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) { - return new X86FastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); + llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { + return new X86FastISel(funcInfo); } } diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index a8117d4..96e0aae 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -17,6 +17,7 @@ namespace X86 { enum Fixups { reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 + reloc_pcrel_2byte, // 16-bit pcrel, e.g. callw reloc_riprel_4byte, // 32-bit rip-relative reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq }; diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 93460ef..cee4ad7 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -133,7 +133,7 @@ namespace { // Emit an fxch to update the runtime processors version of the state. BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); - NumFXCH++; + ++NumFXCH; } void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { @@ -164,6 +164,8 @@ namespace { void handleCompareFP(MachineBasicBlock::iterator &I); void handleCondMovFP(MachineBasicBlock::iterator &I); void handleSpecialFP(MachineBasicBlock::iterator &I); + + bool translateCopy(MachineInstr*); }; char FPS::ID = 0; } @@ -232,12 +234,15 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; - unsigned Flags = MI->getDesc().TSFlags; + uint64_t Flags = MI->getDesc().TSFlags; unsigned FPInstClass = Flags & X86II::FPTypeMask; if (MI->isInlineAsm()) FPInstClass = X86II::SpecialFP; - + + if (MI->isCopy() && translateCopy(MI)) + FPInstClass = X86II::SpecialFP; + if (FPInstClass == X86II::NotFP) continue; // Efficiently ignore non-fp insts! @@ -628,7 +633,7 @@ void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { MachineInstr *MI = I; unsigned NumOps = MI->getDesc().getNumOperands(); - assert((NumOps == X86AddrNumOperands + 1 || NumOps == 1) && + assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && "Can only handle fst* & ftst instructions!"); // Is this the last use of the source register? @@ -1001,15 +1006,17 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: case X86::FpSET_ST0_80: { + // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm + // arguments that use an st constraint. We expect a sequence of + // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM unsigned Op0 = getFPReg(MI->getOperand(0)); - // FpSET_ST0_80 is generated by copyRegToReg for both function return - // and inline assembly with the "st" constrain. In the latter case, - // it is possible for ST(0) to be alive after this instruction. if (!MI->killsRegister(X86::FP0 + Op0)) { - // Duplicate Op0 - duplicateToTop(0, 7 /*temp register*/, I); + // Duplicate Op0 into a temporary on the stack top. + // This actually assumes that FP7 is dead. + duplicateToTop(Op0, 7, I); } else { + // Op0 is killed, so just swap it into position. moveToTop(Op0, I); } --StackTop; // "Forget" we have something on the top of stack! @@ -1017,17 +1024,29 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } case X86::FpSET_ST1_32: case X86::FpSET_ST1_64: - case X86::FpSET_ST1_80: - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; + case X86::FpSET_ST1_80: { + // Set up st(1) for inline asm. We are assuming that st(0) has already been + // set up by FpSET_ST0, and our StackTop is off by one because of it. + unsigned Op0 = getFPReg(MI->getOperand(0)); + // Restore the actual StackTop from before Fp_SET_ST0. + // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we + // are not enforcing the constraint. + ++StackTop; + unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). + if (!MI->killsRegister(X86::FP0 + Op0)) { + // Assume FP6 is not live, use it as a scratch register. + duplicateToTop(Op0, 6, I); + moveToTop(RegOnTop, I); + } else if (getSTReg(Op0) != X86::ST1) { + // We have the wrong value at st(1). Shuffle! Untested! + moveToTop(getStackEntry(1), I); + moveToTop(Op0, I); + moveToTop(RegOnTop, I); } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! + assert(StackTop >= 2 && "Too few live registers"); + StackTop -= 2; // "Forget" both st(0) and st(1). break; + } case X86::MOV_Fp3232: case X86::MOV_Fp3264: case X86::MOV_Fp6432: @@ -1041,32 +1060,6 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { unsigned SrcReg = getFPReg(MO1); const MachineOperand &MO0 = MI->getOperand(0); - // These can be created due to inline asm. Two address pass can introduce - // copies from RFP registers to virtual registers. - if (MO0.getReg() == X86::ST0 && SrcReg == 0) { - assert(MO1.isKill()); - // Treat %ST0 = MOV_Fp8080 %FP0 - // like FpSET_ST0_80 %FP0, %ST0 - assert((StackTop == 1 || StackTop == 2) - && "Stack should have one or two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } else if (MO0.getReg() == X86::ST1 && SrcReg == 1) { - assert(MO1.isKill()); - // Treat %ST1 = MOV_Fp8080 %FP1 - // like FpSET_ST1_80 %FP0, %ST1 - // StackTop can be 1 if a FpSET_ST0_* was before this. Exchange them. - if (StackTop == 1) { - BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(X86::ST1); - NumFXCH++; - StackTop = 0; - break; - } - assert(StackTop == 2 && "Stack should have two element on it to return!"); - --StackTop; // "Forget" we have something on the top of stack! - break; - } - unsigned DestReg = getFPReg(MO0); if (MI->killsRegister(X86::FP0+SrcReg)) { // If the input operand is killed, we can just change the owner of the @@ -1206,3 +1199,33 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { I = MBB->erase(I); // Remove the pseudo instruction --I; } + +// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. +bool FPS::translateCopy(MachineInstr *MI) { + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + + if (DstReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpSET_ST0_80)); + MI->RemoveOperand(0); + return true; + } + if (DstReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpSET_ST1_80)); + MI->RemoveOperand(0); + return true; + } + if (SrcReg == X86::ST0) { + MI->setDesc(TII->get(X86::FpGET_ST0_80)); + return true; + } + if (SrcReg == X86::ST1) { + MI->setDesc(TII->get(X86::FpGET_ST1_80)); + return true; + } + if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { + MI->setDesc(TII->get(X86::MOV_Fp8080)); + return true; + } + return false; +} diff --git a/lib/Target/X86/X86FloatingPointRegKill.cpp b/lib/Target/X86/X86FloatingPointRegKill.cpp index 747683d..2c98b96 100644 --- a/lib/Target/X86/X86FloatingPointRegKill.cpp +++ b/lib/Target/X86/X86FloatingPointRegKill.cpp @@ -72,18 +72,15 @@ static bool isFPStackVReg(unsigned RegNo, const MachineRegisterInfo &MRI) { /// stack code, and thus needs an FP_REG_KILL. static bool ContainsFPStackCode(MachineBasicBlock *MBB, const MachineRegisterInfo &MRI) { - // Scan the block, looking for instructions that define fp stack vregs. + // Scan the block, looking for instructions that define or use fp stack vregs. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { - if (I->getNumOperands() == 0 || !I->getOperand(0).isReg()) - continue; - for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { - if (!I->getOperand(op).isReg() || !I->getOperand(op).isDef()) + if (!I->getOperand(op).isReg()) continue; - - if (isFPStackVReg(I->getOperand(op).getReg(), MRI)) - return true; + if (unsigned Reg = I->getOperand(op).getReg()) + if (isFPStackVReg(Reg, MRI)) + return true; } } @@ -108,8 +105,8 @@ static bool ContainsFPStackCode(MachineBasicBlock *MBB, bool FPRegKiller::runOnMachineFunction(MachineFunction &MF) { // If we are emitting FP stack code, scan the basic block to determine if this - // block defines any FP values. If so, put an FP_REG_KILL instruction before - // the terminator of the block. + // block defines or uses any FP values. If so, put an FP_REG_KILL instruction + // before the terminator of the block. // Note that FP stack instructions are used in all modes for long double, // so we always need to do this check. diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 0f64383..72f2bc1 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -137,21 +137,6 @@ namespace { } namespace { - class X86ISelListener : public SelectionDAG::DAGUpdateListener { - SmallSet Deletes; - public: - explicit X86ISelListener() {} - virtual void NodeDeleted(SDNode *N, SDNode *E) { - Deletes.insert(N); - } - virtual void NodeUpdated(SDNode *N) { - // Ignore updates. - } - bool IsDeleted(SDNode *N) { - return Deletes.count(N); - } - }; - //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -199,16 +184,17 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, SDValue &Segment); bool SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, - SDValue &Scale, SDValue &Index, SDValue &Disp); + SDValue &Scale, SDValue &Index, SDValue &Disp, + SDValue &Segment); bool SelectScalarSSELoad(SDNode *Root, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, SDValue &Disp, @@ -239,7 +225,8 @@ namespace { // These are 32-bit even in 64-bit mode since RIP relative offset // is 32-bit. if (AM.GV) - Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp, + Disp = CurDAG->getTargetGlobalAddress(AM.GV, DebugLoc(), + MVT::i32, AM.Disp, AM.SymbolFlags); else if (AM.CP) Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, @@ -386,14 +373,14 @@ static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, } for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i) Ops.push_back(OrigChain.getOperand(i)); - CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size()); - CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), + CurDAG->UpdateNodeOperands(OrigChain.getNode(), &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); Ops.clear(); Ops.push_back(SDValue(Load.getNode(), 1)); for (unsigned i = 1, e = Call.getNode()->getNumOperands(); i != e; ++i) Ops.push_back(Call.getOperand(i)); - CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Call.getNode(), &Ops[0], Ops.size()); } /// isCalleeLoad - Return true if call address is a load and it can be @@ -515,7 +502,7 @@ void X86DAGToDAGISel::PreprocessISelDAG() { N->getOperand(0), MemTmp, NULL, 0, MemVT, false, false, 0); - SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp, + SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, DstVT, dl, Store, MemTmp, NULL, 0, MemVT, false, false, 0); // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the @@ -664,8 +651,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - X86ISelListener DeadNodes; - if (MatchAddressRecursively(N, AM, DeadNodes, 0)) + if (MatchAddressRecursively(N, AM, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -713,7 +699,6 @@ static bool isLogicallyAddWithConstant(SDValue V, SelectionDAG *CurDAG) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, - X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); @@ -876,13 +861,13 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // other uses, since it avoids a two-address sub instruction, however // it costs an additional mov if the index register has other uses. + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1) || - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - DeadNodes.IsDeleted(N.getNode())) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { AM = Backup; break; } @@ -893,7 +878,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } int Cost = 0; - SDValue RHS = N.getNode()->getOperand(1); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this // transformation incurs an extra mov, due to the neg instruction // clobbering its operand. @@ -944,35 +929,27 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, } case ISD::ADD: { + // Add an artificial use to this node so that we can keep track of + // it if it gets CSE'd with a different node. + HandleSDNode Handle(N); + SDValue LHS = Handle.getValue().getNode()->getOperand(0); + SDValue RHS = Handle.getValue().getNode()->getOperand(1); + X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } + if (!MatchAddressRecursively(LHS, AM, Depth+1) && + !MatchAddressRecursively(RHS, AM, Depth+1)) + return false; + AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // Try again after commuting the operands. + if (!MatchAddressRecursively(RHS, AM, Depth+1) && + !MatchAddressRecursively(LHS, AM, Depth+1)) + return false; AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, - DeadNodes, Depth+1)) { - if (DeadNodes.IsDeleted(N.getNode())) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return true; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, - DeadNodes, Depth+1)) - // If it is successful but the recursive update causes N to be deleted, - // then it's not safe to continue. - return DeadNodes.IsDeleted(N.getNode()); - } - AM = Backup; + LHS = Handle.getValue().getNode()->getOperand(0); + RHS = Handle.getValue().getNode()->getOperand(1); // If we couldn't fold both operands into the address at the same time, // see if we can just put each operand into a register and fold at least @@ -980,8 +957,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (AM.BaseType == X86ISelAddressMode::RegBase && !AM.Base_Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base_Reg = N.getNode()->getOperand(0); - AM.IndexReg = N.getNode()->getOperand(1); + AM.Base_Reg = LHS; + AM.IndexReg = RHS; AM.Scale = 1; return false; } @@ -996,7 +973,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, uint64_t Offset = CN->getSExtValue(); // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. @@ -1073,7 +1050,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, Shl); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1124,7 +1101,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; @@ -1230,7 +1207,8 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root, /// mode it matches can be cost effectively emitted as an LEA instruction. bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, - SDValue &Index, SDValue &Disp) { + SDValue &Index, SDValue &Disp, + SDValue &Segment) { X86ISelAddressMode AM; // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support @@ -1284,7 +1262,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, if (Complexity <= 2) return false; - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1292,10 +1269,10 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N, /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes. bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Scale, SDValue &Index, - SDValue &Disp) { + SDValue &Disp, SDValue &Segment) { assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); const GlobalAddressSDNode *GA = cast(N); - + X86ISelAddressMode AM; AM.GV = GA->getGlobal(); AM.Disp += GA->getOffset(); @@ -1309,7 +1286,6 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base, AM.IndexReg = CurDAG->getRegister(0, MVT::i64); } - SDValue Segment; getAddressOperands(AM, Base, Scale, Index, Disp, Segment); return true; } @@ -1672,6 +1648,26 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + // Get the low part if needed. Don't use getCopyFromReg for aliasing + // registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX down 8 bits. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), 0); + // Then truncate it down to i8. + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the low half of the result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1682,24 +1678,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the high half of the result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } @@ -1812,6 +1793,29 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Flag, N1, InFlag), 0); } + // Prevent use of AH in a REX instruction by referencing AX instead. + // Shift it down 8 bits. + if (HiReg == X86::AH && Subtarget->is64Bit() && + !SDValue(Node, 1).use_empty()) { + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + X86::AX, MVT::i16, InFlag); + InFlag = Result.getValue(2); + + // If we also need AL (the quotient), get it by extracting a subreg from + // Result. The fast register allocator does not like multiple CopyFromReg + // nodes using aliasing registers. + if (!SDValue(Node, 0).use_empty()) + ReplaceUses(SDValue(Node, 0), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + + // Shift AX right by 8 bits instead of using AH. + Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, + Result, + CurDAG->getTargetConstant(8, MVT::i8)), + 0); + ReplaceUses(SDValue(Node, 1), + CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result)); + } // Copy the division (low) result, if it is needed. if (!SDValue(Node, 0).use_empty()) { SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, @@ -1822,25 +1826,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { } // Copy the remainder (high) result, if it is needed. if (!SDValue(Node, 1).use_empty()) { - SDValue Result; - if (HiReg == X86::AH && Subtarget->is64Bit()) { - // Prevent use of AH in a REX instruction by referencing AX instead. - // Shift it down 8 bits. - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - X86::AX, MVT::i16, InFlag); - InFlag = Result.getValue(2); - Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16, - Result, - CurDAG->getTargetConstant(8, MVT::i8)), - 0); - // Then truncate it down to i8. - Result = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, - MVT::i8, Result); - } else { - Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - HiReg, NVT, InFlag); - InFlag = Result.getValue(2); - } + SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + HiReg, NVT, InFlag); + InFlag = Result.getValue(2); ReplaceUses(SDValue(Node, 1), Result); DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n'); } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b02c33d..1a63474 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -62,21 +62,19 @@ static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - switch (TM.getSubtarget().TargetType) { - default: llvm_unreachable("unknown subtarget type"); - case X86Subtarget::isDarwin: - if (TM.getSubtarget().is64Bit()) - return new X8664_MachoTargetObjectFile(); + + bool is64Bit = TM.getSubtarget().is64Bit(); + + if (TM.getSubtarget().isTargetDarwin()) { + if (is64Bit) return new X8664_MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); - case X86Subtarget::isELF: - if (TM.getSubtarget().is64Bit()) - return new X8664_ELFTargetObjectFile(TM); + } else if (TM.getSubtarget().isTargetELF() ){ + if (is64Bit) return new X8664_ELFTargetObjectFile(TM); return new X8632_ELFTargetObjectFile(TM); - case X86Subtarget::isMingw: - case X86Subtarget::isCygwin: - case X86Subtarget::isWindows: + } else if (TM.getSubtarget().isTargetCOFF()) { return new TargetLoweringObjectFileCOFF(); - } + } + llvm_unreachable("unknown subtarget type"); } X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) @@ -347,6 +345,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) if (!Subtarget->hasSSE2()) setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); + // On X86 and X86-64, atomic operations are lowered to locked instructions. + // Locked instructions, in turn, have implicit fence semantics (all memory + // operations are flushed before issuing the locked instruction, and they + // are not buffered), so we can fold away the common pattern of + // fence-atomic-fence. + setShouldFoldAtomicFences(true); // Expand certain atomics setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); @@ -611,7 +615,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) addRegisterClass(MVT::v8i8, X86::VR64RegisterClass, false); addRegisterClass(MVT::v4i16, X86::VR64RegisterClass, false); addRegisterClass(MVT::v2i32, X86::VR64RegisterClass, false); - addRegisterClass(MVT::v2f32, X86::VR64RegisterClass, false); + addRegisterClass(MVT::v1i64, X86::VR64RegisterClass, false); setOperationAction(ISD::ADD, MVT::v8i8, Legal); @@ -657,14 +661,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v2i32, Promote); AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); - setOperationAction(ISD::LOAD, MVT::v2f32, Promote); - AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); setOperationAction(ISD::LOAD, MVT::v1i64, Legal); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); - setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); @@ -672,7 +673,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); - setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); @@ -691,7 +691,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::v8i8, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v4i16, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v2i32, Custom); - setOperationAction(ISD::BIT_CONVERT, MVT::v2f32, Custom); setOperationAction(ISD::BIT_CONVERT, MVT::v1i64, Custom); } } @@ -792,9 +791,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) EVT VT = SVT; // Do not attempt to promote non-128-bit vectors - if (!VT.is128BitVector()) { + if (!VT.is128BitVector()) continue; - } setOperationAction(ISD::AND, SVT, Promote); AddPromotedToType (ISD::AND, SVT, MVT::v2i64); @@ -825,6 +823,17 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (Subtarget->hasSSE41()) { + setOperationAction(ISD::FFLOOR, MVT::f32, Legal); + setOperationAction(ISD::FCEIL, MVT::f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::f32, Legal); + setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::f64, Legal); + setOperationAction(ISD::FCEIL, MVT::f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::f64, Legal); + setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); + // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); @@ -965,15 +974,24 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Add/Sub/Mul with overflow operations are custom lowered. setOperationAction(ISD::SADDO, MVT::i32, Custom); - setOperationAction(ISD::SADDO, MVT::i64, Custom); setOperationAction(ISD::UADDO, MVT::i32, Custom); - setOperationAction(ISD::UADDO, MVT::i64, Custom); setOperationAction(ISD::SSUBO, MVT::i32, Custom); - setOperationAction(ISD::SSUBO, MVT::i64, Custom); setOperationAction(ISD::USUBO, MVT::i32, Custom); - setOperationAction(ISD::USUBO, MVT::i64, Custom); setOperationAction(ISD::SMULO, MVT::i32, Custom); - setOperationAction(ISD::SMULO, MVT::i64, Custom); + + // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't + // handle type legalization for these operations here. + // + // FIXME: We really should do custom legalization for addition and + // subtraction on x86-32 once PR3203 is fixed. We really can't do much better + // than generic legalization for 64-bit multiplication-with-overflow, though. + if (Subtarget->is64Bit()) { + setOperationAction(ISD::SADDO, MVT::i64, Custom); + setOperationAction(ISD::UADDO, MVT::i64, Custom); + setOperationAction(ISD::SSUBO, MVT::i64, Custom); + setOperationAction(ISD::USUBO, MVT::i64, Custom); + setOperationAction(ISD::SMULO, MVT::i64, Custom); + } if (!Subtarget->is64Bit()) { // These libcalls are not available in 32-bit. @@ -992,7 +1010,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::STORE); - setTargetDAGCombine(ISD::MEMBARRIER); setTargetDAGCombine(ISD::ZERO_EXTEND); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -1172,6 +1189,27 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; } +bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, + unsigned &Offset) const { + if (!Subtarget->isTargetLinux()) + return false; + + if (Subtarget->is64Bit()) { + // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs: + Offset = 0x28; + if (getTargetMachine().getCodeModel() == CodeModel::Kernel) + AddressSpace = 256; + else + AddressSpace = 257; + } else { + // %gs:0x14 on i386 + Offset = 0x14; + AddressSpace = 256; + } + return true; +} + + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -1180,19 +1218,19 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { bool X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl &Outs, + LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_X86); } SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); X86MachineFunctionInfo *FuncInfo = MF.getInfo(); @@ -1220,7 +1258,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, for (unsigned i = 0; i != RVLocs.size(); ++i) { CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue ValToCopy = Outs[i].Val; + SDValue ValToCopy = OutVals[i]; // Returns in ST0/ST1 are handled specially: these are pushed as operands to // the RET instruction and handled by the FP Stackifier. @@ -1308,17 +1346,34 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, report_fatal_error("SSE register return with SSE disabled"); } + SDValue Val; + // If this is a call to a function that returns an fp value on the floating - // point stack, but where we prefer to use the value in xmm registers, copy - // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. - if ((VA.getLocReg() == X86::ST0 || - VA.getLocReg() == X86::ST1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { - CopyVT = MVT::f80; - } + // point stack, we must guarantee the the value is popped from the stack, so + // a CopyFromReg is not good enough - the copy instruction may be eliminated + // if the return value is not used. We use the FpGET_ST0 instructions + // instead. + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) { + // If we prefer to use the value in xmm registers, copy it out as f80 and + // use a truncate to move it from fp stack reg to xmm reg. + if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80; + bool isST0 = VA.getLocReg() == X86::ST0; + unsigned Opc = 0; + if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32; + if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64; + if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80; + SDValue Ops[] = { Chain, InFlag }; + Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Flag, + Ops, 2), 1); + Val = Chain.getValue(0); - SDValue Val; - if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { + // Round the f80 to the right size, which also moves it to the appropriate + // xmm register. + if (CopyVT != VA.getValVT()) + Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, + // This truncation won't change the value. + DAG.getIntPtrConstant(1)); + } else if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), @@ -1338,15 +1393,6 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Val = Chain.getValue(0); } InFlag = Chain.getValue(2); - - if (CopyVT != VA.getValVT()) { - // Round the F80 the right size, which also moves to the appropriate xmm - // register. - Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, - // This truncation won't change the value. - DAG.getIntPtrConstant(1)); - } - InVals.push_back(Val); } @@ -1383,29 +1429,6 @@ ArgsAreStructReturn(const SmallVectorImpl &Ins) { return Ins[0].Flags.isSRet(); } -/// IsCalleePop - Determines whether the callee is required to pop its -/// own arguments. Callee pop is necessary to support tail calls. -bool X86TargetLowering::IsCalleePop(bool IsVarArg, - CallingConv::ID CallingConv) const { - if (IsVarArg) - return false; - - switch (CallingConv) { - default: - return false; - case CallingConv::X86_StdCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_FastCall: - return !Subtarget->is64Bit(); - case CallingConv::X86_ThisCall: - return !Subtarget->is64Bit(); - case CallingConv::Fast: - return GuaranteedTailCallOpt; - case CallingConv::GHC: - return GuaranteedTailCallOpt; - } -} - /// CCAssignFnForNode - Selects the correct CCAssignFn for a the /// given CallingConvention value. CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { @@ -1483,11 +1506,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable, false); + VA.getLocMemOffset(), isImmutable); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, PseudoSourceValue::getFixedStack(FI), 0, @@ -1615,8 +1638,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, if (isVarArg) { if (Is64Bit || (CallConv != CallingConv::X86_FastCall && CallConv != CallingConv::X86_ThisCall)) { - FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, - true, false)); + FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1722,7 +1744,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, } // Some CCs need callee pop. - if (IsCalleePop(isVarArg, CallConv)) { + if (Subtarget->IsCalleePop(isVarArg, CallConv)) { FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything. } else { FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing. @@ -1788,7 +1810,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1802,6 +1824,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -1814,7 +1837,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Check if it's really possible to do a tail call. isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), - Outs, Ins, DAG); + Outs, OutVals, Ins, DAG); // Sibcalls are automatically detected tailcalls which do not require // ABI changes. @@ -1874,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; bool isByVal = Flags.isByVal(); @@ -2013,12 +2036,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.isRegLoc()) continue; assert(VA.isMemLoc()); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -2059,7 +2082,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - bool WasGlobalOrExternal = false; if (getTargetMachine().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls @@ -2067,7 +2089,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // pc-relative offset may not be large enough to hold the whole // address. } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - WasGlobalOrExternal = true; // If the callee is a GlobalAddress node (quite common, every direct call // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack // it. @@ -2095,11 +2116,10 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_DARWIN_STUB; } - Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { - WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -2153,17 +2173,12 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Ops.push_back(InFlag); if (isTailCall) { - // If this is the first return lowered for this function, add the regs - // to the liveout set for the function. - if (MF.getRegInfo().liveout_empty()) { - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, - *DAG.getContext()); - CCInfo.AnalyzeCallResult(Ins, RetCC_X86); - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } + // We used to do: + //// If this is the first return lowered for this function, add the regs + //// to the liveout set for the function. + // This isn't right, although it's probably harmless on x86; liveouts + // should be computed from returns not tail calls. Consider a void + // function making a tail call to a function returning int. return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); } @@ -2173,7 +2188,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPush; - if (IsCalleePop(isVarArg, CallConv)) + if (Subtarget->IsCalleePop(isVarArg, CallConv)) NumBytesForCalleeToPush = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) // If this is a call to a struct-return function, the callee @@ -2314,6 +2329,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const { if (!IsTailCallConvention(CalleeCC) && @@ -2332,8 +2348,8 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, return false; } - // Look for obvious safe cases to perform tail call optimization that does not - // requite ABI changes. This is what gcc calls sibcall. + // Look for obvious safe cases to perform tail call optimization that do not + // require ABI changes. This is what gcc calls sibcall. // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. @@ -2427,8 +2443,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; if (VA.getLocInfo() == CCValAssign::Indirect) return false; @@ -2439,26 +2454,32 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, } } } + + // If the tailcall address may be in a register, then make sure it's + // possible to register allocate for it. In 32-bit, the call address can + // only target EAX, EDX, or ECX since the tail call must be scheduled after + // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI, + // RDI, R8, R9, R11. + if (!isa(Callee) && + !isa(Callee)) { + unsigned Limit = Subtarget->is64Bit() ? 8 : 3; + unsigned NumInRegs = 0; + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { + CCValAssign &VA = ArgLocs[i]; + if (VA.isRegLoc()) { + if (++NumInRegs == Limit) + return false; + } + } + } } return true; } FastISel * -X86TargetLowering::createFastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) const { - return X86::createFastISel(mf, vm, bm, am, pn -#ifndef NDEBUG - , cil -#endif - ); +X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { + return X86::createFastISel(funcInfo); } @@ -2476,7 +2497,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, - false, false); + false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -3175,7 +3196,7 @@ unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { /// constant +0.0. bool X86::isZeroNode(SDValue Elt) { return ((isa(Elt) && - cast(Elt)->getZExtValue() == 0) || + cast(Elt)->isNullValue()) || (isa(Elt) && cast(Elt)->getValueAPF().isPosZero())); } @@ -4433,7 +4454,7 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, } /// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide -/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be +/// ones, or rewriting v4i32 / v2i32 as 2 wide ones if possible. This can be /// done when every pair / quad of shuffle mask elements point to elements in /// the right sequence. e.g. /// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> @@ -4447,7 +4468,6 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, unsigned NumElems = VT.getVectorNumElements(); unsigned NewWidth = (NumElems == 4) ? 2 : 4; EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); - EVT MaskEltVT = MaskVT.getVectorElementType(); EVT NewVT = MaskVT; switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unexpected!"); @@ -5059,13 +5079,9 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - if (Op.getValueType() == MVT::v2f32) - return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, - DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, - DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, - Op.getOperand(0)))); - - if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) + + if (Op.getValueType() == MVT::v1i64 && + Op.getOperand(0).getValueType() == MVT::i64) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); @@ -5230,10 +5246,10 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { // A direct static reference to a global. - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); Offset = 0; } else { - Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); + Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); } if (Subtarget->isPICStyleRIPRel() && @@ -5278,7 +5294,7 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); DebugLoc dl = GA->getDebugLoc(); - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), GA->getOffset(), OperandFlags); @@ -5351,7 +5367,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial // exec) - SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), + SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, + GA->getValueType(0), GA->getOffset(), OperandFlags); SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); @@ -5366,33 +5383,78 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - // TODO: implement the "local dynamic" model - // TODO: implement the "initial exec"model for pic executables - assert(Subtarget->isTargetELF() && - "TLS not implemented for non-ELF targets"); + GlobalAddressSDNode *GA = cast(Op); const GlobalValue *GV = GA->getGlobal(); - // If GV is an alias then use the aliasee for determining - // thread-localness. - if (const GlobalAlias *GA = dyn_cast(GV)) - GV = GA->resolveAliasedGlobal(false); - - TLSModel::Model model = getTLSModel(GV, - getTargetMachine().getRelocationModel()); - - switch (model) { - case TLSModel::GeneralDynamic: - case TLSModel::LocalDynamic: // not implemented - if (Subtarget->is64Bit()) - return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); - return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + if (Subtarget->isTargetELF()) { + // TODO: implement the "local dynamic" model + // TODO: implement the "initial exec"model for pic executables + + // If GV is an alias then use the aliasee for determining + // thread-localness. + if (const GlobalAlias *GA = dyn_cast(GV)) + GV = GA->resolveAliasedGlobal(false); + + TLSModel::Model model + = getTLSModel(GV, getTargetMachine().getRelocationModel()); + + switch (model) { + case TLSModel::GeneralDynamic: + case TLSModel::LocalDynamic: // not implemented + if (Subtarget->is64Bit()) + return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); + return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); + + case TLSModel::InitialExec: + case TLSModel::LocalExec: + return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, + Subtarget->is64Bit()); + } + } else if (Subtarget->isTargetDarwin()) { + // Darwin only has one model of TLS. Lower to that. + unsigned char OpFlag = 0; + unsigned WrapperKind = Subtarget->isPICStyleRIPRel() ? + X86ISD::WrapperRIP : X86ISD::Wrapper; + + // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the + // global base reg. + bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && + !Subtarget->is64Bit(); + if (PIC32) + OpFlag = X86II::MO_TLVP_PIC_BASE; + else + OpFlag = X86II::MO_TLVP; + DebugLoc DL = Op.getDebugLoc(); + SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, + getPointerTy(), + GA->getOffset(), OpFlag); + SDValue Offset = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + + // With PIC32, the address is actually $g + Offset. + if (PIC32) + Offset = DAG.getNode(ISD::ADD, DL, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, + DebugLoc(), getPointerTy()), + Offset); + + // Lowering the machine isd will make sure everything is in the right + // location. + SDValue Args[] = { Offset }; + SDValue Chain = DAG.getNode(X86ISD::TLSCALL, DL, MVT::Other, Args, 1); + + // TLSCALL will be codegen'ed as call. Inform MFI that function has calls. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); - case TLSModel::InitialExec: - case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, - Subtarget->is64Bit()); + // And our return value (tls address) is in the standard call return value + // location. + unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); } + + assert(false && + "TLS not implemented for this target."); llvm_unreachable("Unreachable"); return SDValue(); @@ -5715,7 +5777,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, // Load the value out, extending it from f32 to f80. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::f80, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, MVT::f80, dl, DAG.getEntryNode(), FudgePtr, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, 4); // Extend everything to 80 bits to force it to be done on x87. @@ -5964,6 +6026,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, bool NeedCF = false; bool NeedOF = false; switch (X86CC) { + default: break; case X86::COND_A: case X86::COND_AE: case X86::COND_B: case X86::COND_BE: NeedCF = true; @@ -5973,120 +6036,129 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, case X86::COND_O: case X86::COND_NO: NeedOF = true; break; - default: break; } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. - if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { - unsigned Opcode = 0; - unsigned NumOperands = 0; - switch (Op.getNode()->getOpcode()) { - case ISD::ADD: - // Due to an isel shortcoming, be conservative if this add is - // likely to be selected as part of a load-modify-store - // instruction. When the root node in a match is a store, isel - // doesn't know how to remap non-chain non-flag uses of other - // nodes in the match, such as the ADD in this case. This leads - // to the ADD being left around and reselected, with the result - // being two adds in the output. Alas, even if none our users - // are stores, that doesn't prove we're O.K. Ergo, if we have - // any parents that aren't CopyToReg or SETCC, eschew INC/DEC. - // A better fix seems to require climbing the DAG back to the - // root, and it doesn't seem to be worth the effort. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) - goto default_case; - if (ConstantSDNode *C = - dyn_cast(Op.getNode()->getOperand(1))) { - // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { - Opcode = X86ISD::INC; - NumOperands = 1; - break; - } - // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { - Opcode = X86ISD::DEC; - NumOperands = 1; - break; - } + if (Op.getResNo() != 0 || NeedOF || NeedCF) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + unsigned Opcode = 0; + unsigned NumOperands = 0; + switch (Op.getNode()->getOpcode()) { + case ISD::ADD: + // Due to an isel shortcoming, be conservative if this add is likely to be + // selected as part of a load-modify-store instruction. When the root node + // in a match is a store, isel doesn't know how to remap non-chain non-flag + // uses of other nodes in the match, such as the ADD in this case. This + // leads to the ADD being left around and reselected, with the result being + // two adds in the output. Alas, even if none our users are stores, that + // doesn't prove we're O.K. Ergo, if we have any parents that aren't + // CopyToReg or SETCC, eschew INC/DEC. A better fix seems to require + // climbing the DAG back to the root, and it doesn't seem to be worth the + // effort. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) + if (UI->getOpcode() != ISD::CopyToReg && UI->getOpcode() != ISD::SETCC) + goto default_case; + + if (ConstantSDNode *C = + dyn_cast(Op.getNode()->getOperand(1))) { + // An add of one will be selected as an INC. + if (C->getAPIntValue() == 1) { + Opcode = X86ISD::INC; + NumOperands = 1; + break; } - // Otherwise use a regular EFLAGS-setting add. - Opcode = X86ISD::ADD; - NumOperands = 2; - break; - case ISD::AND: { - // If the primary and result isn't used, don't bother using X86ISD::AND, - // because a TEST instruction will be better. - bool NonFlagUse = false; - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), - UE = Op.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - unsigned UOpNo = UI.getOperandNo(); - if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { - // Look pass truncate. - UOpNo = User->use_begin().getOperandNo(); - User = *User->use_begin(); - } - if (User->getOpcode() != ISD::BRCOND && - User->getOpcode() != ISD::SETCC && - (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { - NonFlagUse = true; - break; - } + + // An add of negative one (subtract of one) will be selected as a DEC. + if (C->getAPIntValue().isAllOnesValue()) { + Opcode = X86ISD::DEC; + NumOperands = 1; + break; } - if (!NonFlagUse) + } + + // Otherwise use a regular EFLAGS-setting add. + Opcode = X86ISD::ADD; + NumOperands = 2; + break; + case ISD::AND: { + // If the primary and result isn't used, don't bother using X86ISD::AND, + // because a TEST instruction will be better. + bool NonFlagUse = false; + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + UE = Op.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + unsigned UOpNo = UI.getOperandNo(); + if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { + // Look pass truncate. + UOpNo = User->use_begin().getOperandNo(); + User = *User->use_begin(); + } + + if (User->getOpcode() != ISD::BRCOND && + User->getOpcode() != ISD::SETCC && + (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { + NonFlagUse = true; break; + } } + + if (!NonFlagUse) + break; + } // FALL THROUGH - case ISD::SUB: - case ISD::OR: - case ISD::XOR: - // Due to the ISEL shortcoming noted above, be conservative if this op is - // likely to be selected as part of a load-modify-store instruction. - for (SDNode::use_iterator UI = Op.getNode()->use_begin(), + case ISD::SUB: + case ISD::OR: + case ISD::XOR: + // Due to the ISEL shortcoming noted above, be conservative if this op is + // likely to be selected as part of a load-modify-store instruction. + for (SDNode::use_iterator UI = Op.getNode()->use_begin(), UE = Op.getNode()->use_end(); UI != UE; ++UI) - if (UI->getOpcode() == ISD::STORE) - goto default_case; - // Otherwise use a regular EFLAGS-setting instruction. - switch (Op.getNode()->getOpcode()) { - case ISD::SUB: Opcode = X86ISD::SUB; break; - case ISD::OR: Opcode = X86ISD::OR; break; - case ISD::XOR: Opcode = X86ISD::XOR; break; - case ISD::AND: Opcode = X86ISD::AND; break; - default: llvm_unreachable("unexpected operator!"); - } - NumOperands = 2; - break; - case X86ISD::ADD: - case X86ISD::SUB: - case X86ISD::INC: - case X86ISD::DEC: - case X86ISD::OR: - case X86ISD::XOR: - case X86ISD::AND: - return SDValue(Op.getNode(), 1); - default: - default_case: - break; - } - if (Opcode != 0) { - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); - SmallVector Ops; - for (unsigned i = 0; i != NumOperands; ++i) - Ops.push_back(Op.getOperand(i)); - SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); - DAG.ReplaceAllUsesWith(Op, New); - return SDValue(New.getNode(), 1); + if (UI->getOpcode() == ISD::STORE) + goto default_case; + + // Otherwise use a regular EFLAGS-setting instruction. + switch (Op.getNode()->getOpcode()) { + default: llvm_unreachable("unexpected operator!"); + case ISD::SUB: Opcode = X86ISD::SUB; break; + case ISD::OR: Opcode = X86ISD::OR; break; + case ISD::XOR: Opcode = X86ISD::XOR; break; + case ISD::AND: Opcode = X86ISD::AND; break; } + + NumOperands = 2; + break; + case X86ISD::ADD: + case X86ISD::SUB: + case X86ISD::INC: + case X86ISD::DEC: + case X86ISD::OR: + case X86ISD::XOR: + case X86ISD::AND: + return SDValue(Op.getNode(), 1); + default: + default_case: + break; } - // Otherwise just emit a CMP with 0, which is the TEST pattern. - return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, - DAG.getConstant(0, Op.getValueType())); + if (Opcode == 0) + // Emit a CMP with 0, which is the TEST pattern. + return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, + DAG.getConstant(0, Op.getValueType())); + + SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); + SmallVector Ops; + for (unsigned i = 0; i != NumOperands; ++i) + Ops.push_back(Op.getOperand(i)); + + SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); + DAG.ReplaceAllUsesWith(Op, New); + return SDValue(New.getNode(), 1); } /// Emit nodes that will be selected as "cmp Op0,Op1", or something @@ -6113,15 +6185,21 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC, Op1 = Op1.getOperand(0); SDValue LHS, RHS; - if (Op1.getOpcode() == ISD::SHL) { - if (ConstantSDNode *And10C = dyn_cast(Op1.getOperand(0))) - if (And10C->getZExtValue() == 1) { - LHS = Op0; - RHS = Op1.getOperand(1); - } - } else if (Op0.getOpcode() == ISD::SHL) { + if (Op1.getOpcode() == ISD::SHL) + std::swap(Op0, Op1); + if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *And00C = dyn_cast(Op0.getOperand(0))) if (And00C->getZExtValue() == 1) { + // If we looked past a truncate, check that it's only truncating away + // known zeros. + unsigned BitWidth = Op0.getValueSizeInBits(); + unsigned AndBitWidth = And.getValueSizeInBits(); + if (BitWidth > AndBitWidth) { + APInt Mask = APInt::getAllOnesValue(BitWidth), Zeros, Ones; + DAG.ComputeMaskedBits(Op0, Mask, Zeros, Ones); + if (Zeros.countLeadingOnes() < BitWidth - AndBitWidth) + return SDValue(); + } LHS = Op1; RHS = Op0.getOperand(1); } @@ -6172,7 +6250,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant && - cast(Op1)->getZExtValue() == 0 && + cast(Op1)->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); if (NewSetCC.getNode()) @@ -6552,15 +6630,16 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); CCode = X86::GetOppositeBranchCondition(CCode); CC = DAG.getConstant(CCode, MVT::i8); - SDValue User = SDValue(*Op.getNode()->use_begin(), 0); + SDNode *User = *Op.getNode()->use_begin(); // Look for an unconditional branch following this conditional branch. // We need this because we need to reverse the successors in order // to implement FCMP_OEQ. - if (User.getOpcode() == ISD::BR) { - SDValue FalseBB = User.getOperand(1); - SDValue NewBR = - DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); assert(NewBR == User); + (void)NewBR; Dest = FalseBB; Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -6632,7 +6711,6 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; - EVT IntPtr = getPointerTy(); EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); @@ -6685,7 +6763,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { Store = DAG.getStore(Op.getOperand(0), dl, DAG.getConstant(FuncInfo->getVarArgsFPOffset(), MVT::i32), - FIN, SV, 0, false, false, 0); + FIN, SV, 4, false, false, 0); MemOps.push_back(Store); // Store ptr to overflow_arg_area @@ -6693,7 +6771,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(4)); SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 8, false, false, 0); MemOps.push_back(Store); @@ -6702,7 +6780,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { FIN, DAG.getIntPtrConstant(8)); SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), getPointerTy()); - Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, + Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 16, false, false, 0); MemOps.push_back(Store); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -6712,9 +6790,6 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { // X86-64 va_list is a struct { i32, i32, i8*, i8* }. assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); - SDValue Chain = Op.getOperand(0); - SDValue SrcPtr = Op.getOperand(1); - SDValue SrcSV = Op.getOperand(2); report_fatal_error("VAArgInst is not yet implemented for x86-64!"); return SDValue(); @@ -7733,6 +7808,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; + case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; @@ -7944,8 +8020,11 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -7955,17 +8034,17 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, newMBB->addSuccessor(newMBB); // Insert instructions into newMBB based on incoming instruction - assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); DebugLoc dl = bInstr->getDebugLoc(); MachineOperand& destOper = bInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = bInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &bInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(RC); @@ -8008,7 +8087,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8053,8 +8132,11 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors to thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(bInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8066,12 +8148,12 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, DebugLoc dl = bInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction // There are 8 "real" operands plus 9 implicit def/uses, ignored here. - assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && + assert(bInstr->getNumOperands() < X86::AddrNumOperands + 14 && "unexpected number of operands"); MachineOperand& dest1Oper = bInstr->getOperand(0); MachineOperand& dest2Oper = bInstr->getOperand(1); - MachineOperand* argOpers[2 + X86AddrNumOperands]; - for (int i=0; i < 2 + X86AddrNumOperands; ++i) { + MachineOperand* argOpers[2 + X86::AddrNumOperands]; + for (int i=0; i < 2 + X86::AddrNumOperands; ++i) { argOpers[i] = &bInstr->getOperand(i+2); // We use some of the operands multiple times, so conservatively just @@ -8081,7 +8163,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, } // x86 address has 5 operands: base, index, scale, displacement, and segment. - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] unsigned t1 = F->getRegInfo().createVirtualRegister(RC); MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); @@ -8171,7 +8253,7 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. + bInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8205,8 +8287,11 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, F->insert(MBBIter, newMBB); F->insert(MBBIter, nextMBB); - // Move all successors of thisMBB to nextMBB - nextMBB->transferSuccessors(thisMBB); + // Transfer the remainder of thisMBB and its successor edges to nextMBB. + nextMBB->splice(nextMBB->begin(), thisMBB, + llvm::next(MachineBasicBlock::iterator(mInstr)), + thisMBB->end()); + nextMBB->transferSuccessorsAndUpdatePHIs(thisMBB); // Update thisMBB to fall through to newMBB thisMBB->addSuccessor(newMBB); @@ -8217,16 +8302,16 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, DebugLoc dl = mInstr->getDebugLoc(); // Insert instructions into newMBB based on incoming instruction - assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && + assert(mInstr->getNumOperands() < X86::AddrNumOperands + 4 && "unexpected number of operands"); MachineOperand& destOper = mInstr->getOperand(0); - MachineOperand* argOpers[2 + X86AddrNumOperands]; + MachineOperand* argOpers[2 + X86::AddrNumOperands]; int numArgs = mInstr->getNumOperands() - 1; for (int i=0; i < numArgs; ++i) argOpers[i] = &mInstr->getOperand(i+1); // x86 address has 4 operands: base, index, scale, and displacement - int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] + int lastAddrIndx = X86::AddrNumOperands - 1; // [0,3] int valArgIndx = lastAddrIndx + 1; unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); @@ -8274,7 +8359,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, // insert branch BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); - F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. + mInstr->eraseFromParent(); // The pseudo instruction is gone now. return nextMBB; } @@ -8284,7 +8369,6 @@ MachineBasicBlock * X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, unsigned numArgs, bool memArg) const { - MachineFunction *F = BB->getParent(); DebugLoc dl = MI->getDebugLoc(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -8306,7 +8390,7 @@ X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) .addReg(X86::XMM0); - F->DeleteMachineInstr(MI); + MI->eraseFromParent(); return BB; } @@ -8335,9 +8419,12 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( F->insert(MBBIter, XMMSaveMBB); F->insert(MBBIter, EndMBB); - // Set up the CFG. - // Move any original successors of MBB to the end block. - EndMBB->transferSuccessors(MBB); + // Transfer the remainder of MBB and its successor edges to EndMBB. + EndMBB->splice(EndMBB->begin(), MBB, + llvm::next(MachineBasicBlock::iterator(MI)), + MBB->end()); + EndMBB->transferSuccessorsAndUpdatePHIs(MBB); + // The original block will now fall through to the XMM save block. MBB->addSuccessor(XMMSaveMBB); // The XMMSaveMBB will fall through to the end block. @@ -8376,7 +8463,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( .addMemOperand(MMO); } - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return EndMBB; } @@ -8405,24 +8492,39 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - unsigned Opc = - X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); - BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // If the EFLAGS register isn't dead in the terminator, then claim that it's + // live into the sink and copy blocks. + const MachineFunction *MF = BB->getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + BitVector ReservedRegs = TRI->getReservedRegs(*MF); + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg() || !MO.isUse() || MO.isKill()) continue; + unsigned Reg = MO.getReg(); + if (Reg != X86::EFLAGS) continue; + copy0MBB->addLiveIn(Reg); + sinkMBB->addLiveIn(Reg); + } + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + // Create the conditional branch instruction. + unsigned Opc = + X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); + BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -8431,11 +8533,12 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... - BuildMI(sinkMBB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) + BuildMI(*sinkMBB, sinkMBB->begin(), DL, + TII->get(X86::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return sinkMBB; } @@ -8444,21 +8547,70 @@ X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); - MachineFunction *F = BB->getParent(); // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. // FIXME: The code should be tweaked as soon as we'll try to do codegen for // mingw-w64. - BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) .addExternalSymbol("_alloca") .addReg(X86::EAX, RegState::Implicit) .addReg(X86::ESP, RegState::Implicit) .addReg(X86::EAX, RegState::Define | RegState::Implicit) .addReg(X86::ESP, RegState::Define | RegState::Implicit); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; +} + +MachineBasicBlock * +X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const { + // This is pretty easy. We're taking the value that we received from + // our load from the relocation, sticking it in either RDI (x86-64) + // or EAX and doing an indirect call. The return value will then + // be in the normal return register. + const X86InstrInfo *TII + = static_cast(getTargetMachine().getInstrInfo()); + DebugLoc DL = MI->getDebugLoc(); + MachineFunction *F = BB->getParent(); + + assert(MI->getOperand(3).isGlobal() && "This should be a global"); + + if (Subtarget->is64Bit()) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV64rm), X86::RDI) + .addReg(X86::RIP) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); + addDirectMem(MIB, X86::RDI); + } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(0) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } else { + MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, + TII->get(X86::MOV32rm), X86::EAX) + .addReg(TII->getGlobalBaseReg(F)) + .addImm(0).addReg(0) + .addGlobalAddress(MI->getOperand(3).getGlobal(), 0, + MI->getOperand(3).getTargetFlags()) + .addReg(0); + MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); + addDirectMem(MIB, X86::EAX); + } + + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -8469,6 +8621,9 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, default: assert(false && "Unexpected instr type to insert"); case X86::MINGW_ALLOCA: return EmitLoweredMingwAlloca(MI, BB); + case X86::TLSCall_32: + case X86::TLSCall_64: + return EmitLoweredTLSCall(MI, BB); case X86::CMOV_GR8: case X86::CMOV_V1I64: case X86::CMOV_FR32: @@ -8499,23 +8654,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); - addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... unsigned OldCW = F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW), CWFrameIdx); // Set the high part to be round to zero... - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mi)), CWFrameIdx) .addImm(0xC7F); // Reload the modified control word now... - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); // Restore the memory image of control word to original value - addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) + addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), CWFrameIdx) .addReg(OldCW); // Get the X86 opcode to use. @@ -8554,13 +8711,14 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, } else { AM.Disp = Op.getImm(); } - addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) - .addReg(MI->getOperand(X86AddrNumOperands).getReg()); + addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) + .addReg(MI->getOperand(X86::AddrNumOperands).getReg()); // Reload the original control word now. - addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); + addFrameReference(BuildMI(*BB, MI, DL, + TII->get(X86::FLDCW16m)), CWFrameIdx); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } // String/text processing lowering. @@ -9513,8 +9671,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, if (ShAmt1.getOpcode() == ISD::SUB) { SDValue Sum = ShAmt1.getOperand(0); if (ConstantSDNode *SumC = dyn_cast(Sum)) { - if (SumC->getSExtValue() == Bits && - ShAmt1.getOperand(1) == ShAmt0) + SDValue ShAmt1Op1 = ShAmt1.getOperand(1); + if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE) + ShAmt1Op1 = ShAmt1Op1.getOperand(0); + if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0) return DAG.getNode(Opc, DL, VT, Op0, Op1, DAG.getNode(ISD::TRUNCATE, DL, @@ -9710,58 +9870,6 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -// On X86 and X86-64, atomic operations are lowered to locked instructions. -// Locked instructions, in turn, have implicit fence semantics (all memory -// operations are flushed before issuing the locked instruction, and the -// are not buffered), so we can fold away the common pattern of -// fence-atomic-fence. -static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { - SDValue atomic = N->getOperand(0); - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - break; - default: - return SDValue(); - } - - SDValue fence = atomic.getOperand(0); - if (fence.getOpcode() != ISD::MEMBARRIER) - return SDValue(); - - switch (atomic.getOpcode()) { - case ISD::ATOMIC_CMP_SWAP: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2), - atomic.getOperand(3)); - case ISD::ATOMIC_SWAP: - case ISD::ATOMIC_LOAD_ADD: - case ISD::ATOMIC_LOAD_SUB: - case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_NAND: - case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_MAX: - case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_LOAD_UMAX: - return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), - atomic.getOperand(1), atomic.getOperand(2)); - default: - return SDValue(); - } -} - static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> // (and (i32 x86isd::setcc_carry), 1) @@ -9809,7 +9917,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); - case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); } @@ -9932,8 +10039,8 @@ static bool LowerToBSwap(CallInst *CI) { // so don't worry about this. // Verify this is a simple bswap. - if (CI->getNumOperands() != 2 || - CI->getType() != CI->getOperand(1)->getType() || + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || !CI->getType()->isIntegerTy()) return false; @@ -9946,7 +10053,7 @@ static bool LowerToBSwap(CallInst *CI) { Module *M = CI->getParent()->getParent()->getParent(); Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Op = CallInst::Create(Int, Op, CI->getName(), CI); CI->replaceAllUsesWith(Op); @@ -10079,7 +10186,6 @@ LowerXConstraint(EVT ConstraintVT) const { /// vector. If it is invalid, don't add anything to Ops. void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Constraint, - bool hasMemory, std::vector&Ops, SelectionDAG &DAG) const { SDValue Result(0, 0); @@ -10121,9 +10227,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'e': { // 32-bit signed value if (ConstantSDNode *C = dyn_cast(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getSExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getSExtValue())) { // Widen to 64 bits here to get it sign extended. Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); break; @@ -10136,9 +10241,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'Z': { // 32-bit unsigned value if (ConstantSDNode *C = dyn_cast(Op)) { - const ConstantInt *CI = C->getConstantIntValue(); - if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), - C->getZExtValue())) { + if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), + C->getZExtValue())) { Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); break; } @@ -10155,6 +10259,12 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, break; } + // In any sort of PIC mode addresses need to be computed at runtime by + // adding in a register or some sort of table lookup. These can't + // be used as immediates. + if (Subtarget->isPICStyleGOT() || Subtarget->isPICStyleStubPIC()) + return; + // If we are in non-pic codegen mode, we allow the address of a global (with // an optional displacement) to be used with 'i'. GlobalAddressSDNode *GA = 0; @@ -10190,11 +10300,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, getTargetMachine()))) return; - if (hasMemory) - Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); - else - Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); - Result = Op; + Result = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), + GA->getValueType(0), Offset); break; } } @@ -10203,8 +10310,7 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, Ops.push_back(Result); return; } - return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, - Ops, DAG); + return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } std::vector X86TargetLowering:: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 1ef1a7b..2d28e5c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -196,6 +196,10 @@ namespace llvm { // TLSADDR - Thread Local Storage. TLSADDR, + + // TLSCALL - Thread Local Storage. When calling to an OS provided + // thunk at the address from an earlier relocation. + TLSCALL, // SegmentBaseAddress - The address segment:0 SegmentBaseAddress, @@ -496,7 +500,6 @@ namespace llvm { /// being processed is 'm'. virtual void LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const; @@ -576,20 +579,17 @@ namespace llvm { /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. - virtual FastISel * - createFastISel(MachineFunction &mf, - DenseMap &, - DenseMap &, - DenseMap &, - std::vector > & -#ifndef NDEBUG - , SmallSet & -#endif - ) const; + virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo) const; /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; + /// getStackCookieLocation - Return true if the target stores stack + /// protector cookies at a fixed offset in some non-standard address + /// space, and populates the address space and offset as + /// appropriate. + virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -643,6 +643,7 @@ namespace llvm { bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, SelectionDAG& DAG) const; bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; @@ -725,6 +726,7 @@ namespace llvm { LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -733,13 +735,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl &Outs, + LLVMContext &Context) const; void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl &Results, SelectionDAG &DAG, unsigned NewOp) const; @@ -794,6 +796,9 @@ namespace llvm { MachineBasicBlock *EmitLoweredMingwAlloca(MachineInstr *MI, MachineBasicBlock *BB) const; + + MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, + MachineBasicBlock *BB) const; /// Emit nodes that will be selected as "test Op0,Op0", or something /// equivalent, for use with the given x86 condition code. @@ -806,15 +811,7 @@ namespace llvm { }; namespace X86 { - FastISel *createFastISel(MachineFunction &mf, - DenseMap &, - DenseMap &, - DenseMap &, - std::vector > & -#ifndef NDEBUG - , SmallSet & -#endif - ); + FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } } diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 97eb17c..42d0e7f 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -35,6 +35,14 @@ def i64i8imm : Operand { let ParserMatchClass = ImmSExti64i8AsmOperand; } +def lea64_32mem : Operand { + let PrintMethod = "printi32mem"; + let AsmOperandLowerMethod = "lower_lea64_32mem"; + let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + + // Special i64mem for addresses of load folding tail calls. These are not // allowed to use callee-saved registers since they must be scheduled // after callee-saved register are popped. @@ -44,29 +52,16 @@ def i64mem_TC : Operand { let ParserMatchClass = X86MemAsmOperand; } -def lea64mem : Operand { - let PrintMethod = "printlea64mem"; - let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - -def lea64_32mem : Operand { - let PrintMethod = "printlea64_32mem"; - let AsmOperandLowerMethod = "lower_lea64_32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} - //===----------------------------------------------------------------------===// // Complex Pattern Definitions. // -def lea64addr : ComplexPattern; -def tls64addr : ComplexPattern; - + //===----------------------------------------------------------------------===// // Pattern fragments. // @@ -289,11 +284,11 @@ def LEA64_32r : I<0x8D, MRMSrcMem, [(set GR32:$dst, lea32addr:$src)]>, Requires<[In64BitMode]>; let isReMaterializable = 1 in -def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins lea64mem:$src), +def LEA64r : RI<0x8D, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "lea{q}\t{$src|$dst}, {$dst|$src}", [(set GR64:$dst, lea64addr:$src)]>; -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src), "bswap{q}\t$dst", [(set GR64:$dst, (bswap GR64:$src))]>, TB; @@ -521,7 +516,7 @@ let Defs = [EFLAGS] in { def ADD64i32 : RIi32<0x05, RawFrm, (outs), (ins i64i32imm:$src), "add{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isConvertibleToThreeAddress = 1 in { let isCommutable = 1 in // Register-Register Addition @@ -559,7 +554,7 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), [(set GR64:$dst, EFLAGS, (X86add_flag GR64:$src1, (load addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Memory-Register Addition def ADD64mr : RI<0x01, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -580,7 +575,7 @@ let Uses = [EFLAGS] in { def ADC64i32 : RIi32<0x15, RawFrm, (outs), (ins i64i32imm:$src), "adc{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def ADC64rr : RI<0x11, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -606,7 +601,7 @@ def ADC64ri32 : RIi32<0x81, MRM2r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (adde GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def ADC64mr : RI<0x11, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), "adc{q}\t{$src2, $dst|$dst, $src2}", @@ -621,7 +616,7 @@ def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2), addr:$dst)]>; } // Uses = [EFLAGS] -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Register-Register Subtraction def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -653,7 +648,7 @@ def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst), "sub{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i64i32imm:$src), "sub{q}\t{$src, %rax|%rax, $src}", []>; @@ -677,7 +672,7 @@ def SUB64mi32 : RIi32<0x81, MRM5m, (outs), (ins i64mem:$dst, i64i32imm:$src2), (implicit EFLAGS)]>; let Uses = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def SBB64rr : RI<0x19, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", @@ -702,7 +697,7 @@ def SBB64ri32 : RIi32<0x81, MRM3r, (outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2), "sbb{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (sube GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def SBB64i32 : RIi32<0x1D, RawFrm, (outs), (ins i64i32imm:$src), "sbb{q}\t{$src, %rax|%rax, $src}", []>; @@ -736,7 +731,7 @@ def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), } let Defs = [EFLAGS] in { -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in // Register-Register Signed Integer Multiplication def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst), @@ -751,7 +746,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), "imul{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Suprisingly enough, these are not two address instructions! @@ -803,7 +798,7 @@ def IDIV64m: RI<0xF7, MRM7m, (outs), (ins i64mem:$src), // Unary instructions let Defs = [EFLAGS], CodeSize = 2 in { -let isTwoAddress = 1 in +let Constraints = "$src = $dst" in def NEG64r : RI<0xF7, MRM3r, (outs GR64:$dst), (ins GR64:$src), "neg{q}\t$dst", [(set GR64:$dst, (ineg GR64:$src)), (implicit EFLAGS)]>; @@ -811,14 +806,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst", [(store (ineg (loadi64 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>; def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst", [(store (add (loadi64 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst", [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>; def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -826,7 +821,7 @@ def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", (implicit EFLAGS)]>; // In 64-bit mode, single byte INC and DEC cannot be encoded. -let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in { +let Constraints = "$src = $dst", isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src), "inc{w}\t$dst", @@ -844,38 +839,36 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst", [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, Requires<[In64BitMode]>; -} // isConvertibleToThreeAddress +} // Constraints = "$src = $dst", isConvertibleToThreeAddress // These are duplicates of their 32-bit counterparts. Only needed so X86 knows // how to unfold them. -let isTwoAddress = 0, CodeSize = 2 in { - def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; - def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - OpSize, Requires<[In64BitMode]>; - def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst), - (implicit EFLAGS)]>, - Requires<[In64BitMode]>; -} +def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; +def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + OpSize, Requires<[In64BitMode]>; +def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst), + (implicit EFLAGS)]>, + Requires<[In64BitMode]>; } // Defs = [EFLAGS], CodeSize let Defs = [EFLAGS] in { // Shift instructions -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src), +def SHL64rCL : RI<0xD3, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (shl GR64:$src, CL))]>; + [(set GR64:$dst, (shl GR64:$src1, CL))]>; let isConvertibleToThreeAddress = 1 in // Can transform into LEA. def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), @@ -885,7 +878,7 @@ def SHL64ri : RIi8<0xC1, MRM4r, (outs GR64:$dst), // 'add reg,reg' is cheaper. def SHL64r1 : RI<0xD1, MRM4r, (outs GR64:$dst), (ins GR64:$src1), "shl{q}\t$dst", []>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHL64mCL : RI<0xD3, MRM4m, (outs), (ins i64mem:$dst), @@ -898,18 +891,18 @@ def SHL64m1 : RI<0xD1, MRM4m, (outs), (ins i64mem:$dst), "shl{q}\t$dst", [(store (shl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src), +def SHR64rCL : RI<0xD3, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (srl GR64:$src, CL))]>; + [(set GR64:$dst, (srl GR64:$src1, CL))]>; def SHR64ri : RIi8<0xC1, MRM5r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "shr{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, (srl GR64:$src1, (i8 imm:$src2)))]>; def SHR64r1 : RI<0xD1, MRM5r, (outs GR64:$dst), (ins GR64:$src1), "shr{q}\t$dst", [(set GR64:$dst, (srl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def SHR64mCL : RI<0xD3, MRM5m, (outs), (ins i64mem:$dst), @@ -922,11 +915,11 @@ def SHR64m1 : RI<0xD1, MRM5m, (outs), (ins i64mem:$dst), "shr{q}\t$dst", [(store (srl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src), +def SAR64rCL : RI<0xD3, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (sra GR64:$src, CL))]>; + [(set GR64:$dst, (sra GR64:$src1, CL))]>; def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "sar{q}\t{$src2, $dst|$dst, $src2}", @@ -934,7 +927,7 @@ def SAR64ri : RIi8<0xC1, MRM7r, (outs GR64:$dst), def SAR64r1 : RI<0xD1, MRM7r, (outs GR64:$dst), (ins GR64:$src1), "sar{q}\t$dst", [(set GR64:$dst, (sra GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src = $dst" let Uses = [CL] in def SAR64mCL : RI<0xD3, MRM7m, (outs), (ins i64mem:$dst), @@ -949,7 +942,7 @@ def SAR64m1 : RI<0xD1, MRM7m, (outs), (ins i64mem:$dst), // Rotate instructions -let isTwoAddress = 1 in { +let Constraints = "$src = $dst" in { def RCL64r1 : RI<0xD1, MRM2r, (outs GR64:$dst), (ins GR64:$src), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64ri : RIi8<0xC1, MRM2r, (outs GR64:$dst), (ins GR64:$src, i8imm:$cnt), @@ -966,9 +959,8 @@ def RCL64rCL : RI<0xD3, MRM2r, (outs GR64:$dst), (ins GR64:$src), def RCR64rCL : RI<0xD3, MRM3r, (outs GR64:$dst), (ins GR64:$src), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "$src = $dst" -let isTwoAddress = 0 in { def RCL64m1 : RI<0xD1, MRM2m, (outs), (ins i64mem:$dst), "rcl{q}\t{1, $dst|$dst, 1}", []>; def RCL64mi : RIi8<0xC1, MRM2m, (outs), (ins i64mem:$dst, i8imm:$cnt), @@ -984,13 +976,12 @@ def RCL64mCL : RI<0xD3, MRM2m, (outs), (ins i64mem:$dst), def RCR64mCL : RI<0xD3, MRM3m, (outs), (ins i64mem:$dst), "rcr{q}\t{%cl, $dst|$dst, CL}", []>; } -} -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src), +def ROL64rCL : RI<0xD3, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotl GR64:$src, CL))]>; + [(set GR64:$dst, (rotl GR64:$src1, CL))]>; def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "rol{q}\t{$src2, $dst|$dst, $src2}", @@ -998,7 +989,7 @@ def ROL64ri : RIi8<0xC1, MRM0r, (outs GR64:$dst), def ROL64r1 : RI<0xD1, MRM0r, (outs GR64:$dst), (ins GR64:$src1), "rol{q}\t$dst", [(set GR64:$dst, (rotl GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROL64mCL : RI<0xD3, MRM0m, (outs), (ins i64mem:$dst), @@ -1011,11 +1002,11 @@ def ROL64m1 : RI<0xD1, MRM0m, (outs), (ins i64mem:$dst), "rol{q}\t$dst", [(store (rotl (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in -def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src), +def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t{%cl, $dst|$dst, %CL}", - [(set GR64:$dst, (rotr GR64:$src, CL))]>; + [(set GR64:$dst, (rotr GR64:$src1, CL))]>; def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), (ins GR64:$src1, i8imm:$src2), "ror{q}\t{$src2, $dst|$dst, $src2}", @@ -1023,7 +1014,7 @@ def ROR64ri : RIi8<0xC1, MRM1r, (outs GR64:$dst), def ROR64r1 : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1), "ror{q}\t$dst", [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in def ROR64mCL : RI<0xD3, MRM1m, (outs), (ins i64mem:$dst), @@ -1037,7 +1028,7 @@ def ROR64m1 : RI<0xD1, MRM1m, (outs), (ins i64mem:$dst), [(store (rotr (loadi64 addr:$dst), (i8 1)), addr:$dst)]>; // Double shift instructions (generalizations of rotate) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let Uses = [CL] in { def SHLD64rrCL : RI<0xA5, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1067,7 +1058,7 @@ def SHRD64rri8 : RIi8<0xAC, MRMDestReg, (i8 imm:$src3)))]>, TB; } // isCommutable -} // isTwoAddress +} // Constraints = "$src1 = $dst" let Uses = [CL] in { def SHLD64mrCL : RI<0xA5, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), @@ -1097,7 +1088,7 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, // Logical Instructions... // -let isTwoAddress = 1 , AddedComplexity = 15 in +let Constraints = "$src = $dst" , AddedComplexity = 15 in def NOT64r : RI<0xF7, MRM2r, (outs GR64:$dst), (ins GR64:$src), "not{q}\t$dst", [(set GR64:$dst, (not GR64:$src))]>; def NOT64m : RI<0xF7, MRM2m, (outs), (ins i64mem:$dst), "not{q}\t$dst", @@ -1107,7 +1098,7 @@ let Defs = [EFLAGS] in { def AND64i32 : RIi32<0x25, RawFrm, (outs), (ins i64i32imm:$src), "and{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def AND64rr : RI<0x21, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1134,7 +1125,7 @@ def AND64ri32 : RIi32<0x81, MRM4r, "and{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86and_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def AND64mr : RI<0x21, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), @@ -1152,7 +1143,7 @@ def AND64mi32 : RIi32<0x81, MRM4m, [(store (and (loadi64 addr:$dst), i64immSExt32:$src), addr:$dst), (implicit EFLAGS)]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1179,7 +1170,7 @@ def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst), "or{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86or_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "or{q}\t{$src, $dst|$dst, $src}", @@ -1197,7 +1188,7 @@ def OR64mi32 : RIi32<0x81, MRM1m, (outs), (ins i64mem:$dst, i64i32imm:$src), def OR64i32 : RIi32<0x0D, RawFrm, (outs), (ins i64i32imm:$src), "or{q}\t{$src, %rax|%rax, $src}", []>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let isCommutable = 1 in def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), @@ -1224,7 +1215,7 @@ def XOR64ri32 : RIi32<0x81, MRM6r, "xor{q}\t{$src2, $dst|$dst, $src2}", [(set GR64:$dst, EFLAGS, (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), "xor{q}\t{$src, $dst|$dst, $src}", @@ -1366,7 +1357,7 @@ def BTS64mi8 : RIi8<0xBA, MRM5m, (outs), (ins i64mem:$src1, i64i8imm:$src2), } // Defs = [EFLAGS] // Conditional moves -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { let isCommutable = 1 in { def CMOVB64rr : RI<0x42, MRMSrcReg, // if , TB; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Use sbb to materialize carry flag into a GPR. // FIXME: This are pseudo ops that should be replaced with Pat<> patterns. @@ -1588,7 +1579,7 @@ def CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SD64rr: RSDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2sd{q}\t{$src2, $dst|$dst, $src2}", @@ -1601,7 +1592,7 @@ def Int_CVTSI2SD64rm: RSDI<0x2A, MRMSrcMem, [(set VR128:$dst, (int_x86_sse2_cvtsi642sd VR128:$src1, (loadi64 addr:$src2)))]>; -} // isTwoAddress +} // Constraints = "$src1 = $dst" // Signed i64 -> f32 def CVTSI2SS64rr: RSSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR64:$src), @@ -1611,7 +1602,7 @@ def CVTSI2SS64rm: RSSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i64mem:$src), "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (sint_to_fp (loadi64 addr:$src)))]>; -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { def Int_CVTSI2SS64rr : RSSI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2), "cvtsi2ss{q}\t{$src2, $dst|$dst, $src2}", @@ -1625,7 +1616,7 @@ let isTwoAddress = 1 in { [(set VR128:$dst, (int_x86_sse_cvtsi642ss VR128:$src1, (loadi64 addr:$src2)))]>; -} +} // Constraints = "$src1 = $dst" // f32 -> signed i64 def CVTSS2SI64rr: RSSI<0x2D, MRMSrcReg, (outs GR64:$dst), (ins FR32:$src), @@ -1691,6 +1682,7 @@ def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), // Thread Local Storage Instructions //===----------------------------------------------------------------------===// +// ELF TLS Support // All calls clobber the non-callee saved registers. RSP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -1700,7 +1692,7 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [RSP] in -def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), +def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym), ".byte\t0x66; " "leaq\t$sym(%rip), %rdi; " ".word\t0x6666; " @@ -1709,6 +1701,17 @@ def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym), [(X86tlsaddr tls64addr:$sym)]>, Requires<[In64BitMode]>; +// Darwin TLS Support +// For x86_64, the address of the thunk is passed in %rdi, on return +// the address of the variable is in %rax. All other registers are preserved. +let Defs = [RAX], + Uses = [RDI], + usesCustomInserter = 1 in +def TLSCall_64 : I<0, Pseudo, (outs), (ins i64mem:$sym), + "# TLSCall_64", + [(X86TLSCall addr:$sym)]>, + Requires<[In64BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def MOV64GSrm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "movq\t%gs:$src, $dst", @@ -1964,6 +1967,17 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>, Requires<[In64BitMode]>; +// tls has some funny stuff here... +// This corresponds to movabs $foo@tpoff, %rax +def : Pat<(i64 (X86Wrapper tglobaltlsaddr :$dst)), + (MOV64ri tglobaltlsaddr :$dst)>; +// This corresponds to add $foo@tpoff, %rax +def : Pat<(add GR64:$src1, (X86Wrapper tglobaltlsaddr :$dst)), + (ADD64ri32 GR64:$src1, tglobaltlsaddr :$dst)>; +// This corresponds to mov foo@tpoff(%rbx), %eax +def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), + (MOV64rm tglobaltlsaddr :$dst)>; + // Comparisons. // TEST R,R is smaller than CMP R,0 @@ -2332,45 +2346,3 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; -//===----------------------------------------------------------------------===// -// X86-64 SSE4.1 Instructions -//===----------------------------------------------------------------------===// - -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract64 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize, REX_W; - def mr : SS4AIi8, OpSize, REX_W; -} - -defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; - -let isTwoAddress = 1 in { - multiclass SS41I_insert64 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize, REX_W; - def rm : SS4AIi8, OpSize, REX_W; - } -} - -defm PINSRQ : SS41I_insert64<0x22, "pinsrq">; diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index 5a82a7b..2a6a71d 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -64,19 +64,15 @@ struct X86AddressMode { /// static inline const MachineInstrBuilder & addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { - // Because memory references are always represented with four - // values, this adds: Reg, [1, NoReg, 0] to the instruction. - return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0); + // Because memory references are always represented with five + // values, this adds: Reg, 1, NoReg, 0, NoReg to the instruction. + return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0); } -static inline const MachineInstrBuilder & -addLeaOffset(const MachineInstrBuilder &MIB, int Offset) { - return MIB.addImm(1).addReg(0).addImm(Offset); -} static inline const MachineInstrBuilder & addOffset(const MachineInstrBuilder &MIB, int Offset) { - return addLeaOffset(MIB, Offset).addReg(0); + return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0); } /// addRegOffset - This function is used to add a memory reference of the form @@ -89,25 +85,20 @@ addRegOffset(const MachineInstrBuilder &MIB, return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); } -static inline const MachineInstrBuilder & -addLeaRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, int Offset) { - return addLeaOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); -} - /// addRegReg - This function is used to add a memory reference of the form: /// [Reg + Reg]. static inline const MachineInstrBuilder &addRegReg(const MachineInstrBuilder &MIB, unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(1) - .addReg(Reg2, getKillRegState(isKill2)).addImm(0); + .addReg(Reg2, getKillRegState(isKill2)).addImm(0).addReg(0); } static inline const MachineInstrBuilder & -addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { - assert (AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); - +addFullAddress(const MachineInstrBuilder &MIB, + const X86AddressMode &AM) { + assert(AM.Scale == 1 || AM.Scale == 2 || AM.Scale == 4 || AM.Scale == 8); + if (AM.BaseType == X86AddressMode::RegBase) MIB.addReg(AM.Base.Reg); else if (AM.BaseType == X86AddressMode::FrameIndexBase) @@ -116,15 +107,11 @@ addLeaAddress(const MachineInstrBuilder &MIB, const X86AddressMode &AM) { assert (0); MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) - return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); + MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else - return MIB.addImm(AM.Disp); -} - -static inline const MachineInstrBuilder & -addFullAddress(const MachineInstrBuilder &MIB, - const X86AddressMode &AM) { - return addLeaAddress(MIB, AM).addReg(0); + MIB.addImm(AM.Disp); + + return MIB.addReg(0); } /// addFrameReference - This function is used to add a reference to the base of diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index 0aae4a8..da93de9 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -371,7 +371,7 @@ multiclass FPCMov { Requires<[HasCMov]>; } -let Uses = [EFLAGS], isTwoAddress = 1 in { +let Uses = [EFLAGS], Constraints = "$src1 = $dst" in { defm CMOVB : FPCMov; defm CMOVBE : FPCMov; defm CMOVE : FPCMov; @@ -380,7 +380,7 @@ defm CMOVNB : FPCMov; defm CMOVNBE: FPCMov; defm CMOVNE : FPCMov; defm CMOVNP : FPCMov; -} +} // Uses = [EFLAGS], Constraints = "$src1 = $dst" let Predicates = [HasCMov] in { // These are not factored because there's no clean way to pass DA/DB. @@ -680,19 +680,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>; // FP extensions map onto simple pseudo-value conversions if they are to/from // the FP stack. -def : Pat<(f64 (fextend RFP32:$src)), (MOV_Fp3264 RFP32:$src)>, +def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP32:$src)), (MOV_Fp3280 RFP32:$src)>, +def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>, Requires<[FPStackf32]>; -def : Pat<(f80 (fextend RFP64:$src)), (MOV_Fp6480 RFP64:$src)>, +def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>, Requires<[FPStackf64]>; // FP truncations map onto simple pseudo-value conversions if they are to/from // the FP stack. We have validated that only value-preserving truncations make // it through isel. -def : Pat<(f32 (fround RFP64:$src)), (MOV_Fp6432 RFP64:$src)>, +def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f32 (fround RFP80:$src)), (MOV_Fp8032 RFP80:$src)>, +def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>, Requires<[FPStackf32]>; -def : Pat<(f64 (fround RFP80:$src)), (MOV_Fp8064 RFP80:$src)>, +def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>, Requires<[FPStackf64]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index c4522f3..97578af 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -50,9 +50,10 @@ def NoImm : ImmType<0>; def Imm8 : ImmType<1>; def Imm8PCRel : ImmType<2>; def Imm16 : ImmType<3>; -def Imm32 : ImmType<4>; -def Imm32PCRel : ImmType<5>; -def Imm64 : ImmType<6>; +def Imm16PCRel : ImmType<4>; +def Imm32 : ImmType<5>; +def Imm32PCRel : ImmType<6>; +def Imm64 : ImmType<7>; // FPFormat - This specifies what form this FP instruction has. This is used by // the Floating-Point stackifier pass. @@ -101,6 +102,10 @@ class XS { bits<4> Prefix = 12; } class T8 { bits<4> Prefix = 13; } class TA { bits<4> Prefix = 14; } class TF { bits<4> Prefix = 15; } +class VEX { bit hasVEXPrefix = 1; } +class VEX_W { bit hasVEX_WPrefix = 1; } +class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } +class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; } class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, string AsmStr, Domain d = GenericDomain> @@ -128,6 +133,11 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? bits<2> SegOvrBits = 0; // Segment override prefix. Domain ExeDomain = d; + bit hasVEXPrefix = 0; // Does this inst requires a VEX prefix? + bit hasVEX_WPrefix = 0; // Does this inst set the VEX_W field? + bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field? + bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register + // to be encoded in a immediate field? // TSFlags layout should be kept in sync with X86InstrInfo.h. let TSFlags{5-0} = FormBits; @@ -141,6 +151,10 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{21-20} = SegOvrBits; let TSFlags{23-22} = ExeDomain.Value; let TSFlags{31-24} = Opcode; + let TSFlags{32} = hasVEXPrefix; + let TSFlags{33} = hasVEX_WPrefix; + let TSFlags{34} = hasVEX_4VPrefix; + let TSFlags{35} = hasVEX_i8ImmReg; } class I o, Format f, dag outs, dag ins, string asm, @@ -174,6 +188,13 @@ class Ii32 o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +class Ii16PCRel o, Format f, dag outs, dag ins, string asm, + list pattern> + : X86Inst { + let Pattern = pattern; + let CodeSize = 3; +} + class Ii32PCRel o, Format f, dag outs, dag ins, string asm, list pattern> : X86Inst { @@ -211,11 +232,56 @@ class Iseg32 o, Format f, dag outs, dag ins, string asm, let CodeSize = 3; } +// SI - SSE 1 & 2 scalar instructions +class SI o, Format F, dag outs, dag ins, string asm, list pattern> + : I { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// SIi8 - SSE 1 & 2 scalar instructions +class SIi8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8 { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(!eq(Prefix, 12 /* XS */), [HasSSE1], [HasSSE2])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PI - SSE 1 & 2 packed instructions +class PI o, Format F, dag outs, dag ins, string asm, list pattern, + Domain d> + : I { + let Predicates = !if(hasVEXPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEXPrefix, !strconcat("v", asm), asm); +} + +// PIi8 - SSE 1 & 2 packed instructions with immediate +class PIi8 o, Format F, dag outs, dag ins, string asm, + list pattern, Domain d> + : Ii8 { + let Predicates = !if(hasVEX_4VPrefix /* VEX */, [HasAVX], + !if(hasOpSizePrefix /* OpSize */, [HasSSE2], [HasSSE1])); + + // AVX instructions have a 'v' prefix in the mnemonic + let AsmString = !if(hasVEX_4VPrefix, !strconcat("v", asm), asm); +} + // SSE1 Instruction Templates: // // SSI - SSE1 instructions with XS prefix. // PSI - SSE1 instructions with TB prefix. // PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix. +// VSSI - SSE1 instructions with XS prefix in AVX form. +// VPSI - SSE1 instructions with TB prefix in AVX form. class SSI o, Format F, dag outs, dag ins, string asm, list pattern> : I, XS, Requires<[HasSSE1]>; @@ -229,6 +295,14 @@ class PSIi8 o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TB, Requires<[HasSSE1]>; +class VSSI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, XS, + Requires<[HasAVX]>; +class VPSI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, + Requires<[HasAVX]>; // SSE2 Instruction Templates: // @@ -237,6 +311,8 @@ class PSIi8 o, Format F, dag outs, dag ins, string asm, // SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix. // PDI - SSE2 instructions with TB and OpSize prefixes. // PDIi8 - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes. +// VSDI - SSE2 instructions with XD prefix in AVX form. +// VPDI - SSE2 instructions with TB and OpSize prefixes in AVX form. class SDI o, Format F, dag outs, dag ins, string asm, list pattern> : I, XD, Requires<[HasSSE2]>; @@ -253,6 +329,14 @@ class PDIi8 o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TB, OpSize, Requires<[HasSSE2]>; +class VSDI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, XD, + Requires<[HasAVX]>; +class VPDI o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, + OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: // diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 6b9478d..71c4e8b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -60,3 +60,339 @@ def mmx_pshufw : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return X86::isPSHUFDMask(cast(N)); }], MMX_SHUFFLE_get_shuf_imm>; + +//===----------------------------------------------------------------------===// +// SSE specific DAG Nodes. +//===----------------------------------------------------------------------===// + +def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, + SDTCisFP<0>, SDTCisInt<2> ]>; +def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisFP<1>, SDTCisVT<3, i8>]>; + +def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; +def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; +def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, + [SDNPCommutative, SDNPAssociative]>; +def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; +def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; +def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; +def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; +def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; +def X86pshufb : SDNode<"X86ISD::PSHUFB", + SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; +def X86pextrb : SDNode<"X86ISD::PEXTRB", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pextrw : SDNode<"X86ISD::PEXTRW", + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; +def X86pinsrb : SDNode<"X86ISD::PINSRB", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86pinsrw : SDNode<"X86ISD::PINSRW", + SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, + SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; +def X86insrtps : SDNode<"X86ISD::INSERTPS", + SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, + SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; +def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", + SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; +def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, + [SDNPHasChain, SDNPMayLoad]>; +def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; +def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; +def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; +def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; +def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; +def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; +def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; +def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; +def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; + +def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, v4f32>, + SDTCisVT<2, v4f32>]>; +def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; + +//===----------------------------------------------------------------------===// +// SSE Complex Patterns +//===----------------------------------------------------------------------===// + +// These are 'extloads' from a scalar to the low element of a vector, zeroing +// the top elements. These are used for the SSE 'ss' and 'sd' instruction +// forms. +def sse_load_f32 : ComplexPattern; +def sse_load_f64 : ComplexPattern; + +def ssmem : Operand { + let PrintMethod = "printf32mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} +def sdmem : Operand { + let PrintMethod = "printf64mem"; + let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); + let ParserMatchClass = X86MemAsmOperand; +} + +//===----------------------------------------------------------------------===// +// SSE pattern fragments +//===----------------------------------------------------------------------===// + +def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; +def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; +def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; +def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; +def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; +def loadv8i32 : PatFrag<(ops node:$ptr), (v8i32 (load node:$ptr))>; +def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; + +// Like 'store', but always requires vector alignment. +def alignedstore : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; + +// Like 'load', but always requires vector alignment. +def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 16; +}]>; + +def alignedloadfsf32 : PatFrag<(ops node:$ptr), + (f32 (alignedload node:$ptr))>; +def alignedloadfsf64 : PatFrag<(ops node:$ptr), + (f64 (alignedload node:$ptr))>; +def alignedloadv4f32 : PatFrag<(ops node:$ptr), + (v4f32 (alignedload node:$ptr))>; +def alignedloadv2f64 : PatFrag<(ops node:$ptr), + (v2f64 (alignedload node:$ptr))>; +def alignedloadv4i32 : PatFrag<(ops node:$ptr), + (v4i32 (alignedload node:$ptr))>; +def alignedloadv2i64 : PatFrag<(ops node:$ptr), + (v2i64 (alignedload node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def alignedloadv8f32 : PatFrag<(ops node:$ptr), + (v8f32 (alignedload node:$ptr))>; +def alignedloadv4f64 : PatFrag<(ops node:$ptr), + (v4f64 (alignedload node:$ptr))>; +def alignedloadv8i32 : PatFrag<(ops node:$ptr), + (v8i32 (alignedload node:$ptr))>; +def alignedloadv4i64 : PatFrag<(ops node:$ptr), + (v4i64 (alignedload node:$ptr))>; + +// Like 'load', but uses special alignment checks suitable for use in +// memory operands in most SSE instructions, which are required to +// be naturally aligned on some targets but not on others. If the subtarget +// allows unaligned accesses, match any load, though this may require +// setting a feature bit in the processor (on startup, for example). +// Opteron 10h and later implement such a feature. +def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return Subtarget->hasVectorUAMem() + || cast(N)->getAlignment() >= 16; +}]>; + +def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; +def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; +def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; +def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; +def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; +def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; +def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; + +// FIXME: move this to a more appropriate place after all AVX is done. +def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; +def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; + +// SSSE3 uses MMX registers for some instructions. They aren't aligned on a +// 16-byte boundary. +// FIXME: 8 byte alignment for mmx reads is not required +def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return cast(N)->getAlignment() >= 8; +}]>; + +def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; +def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; +def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; +def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; + +// MOVNT Support +// Like 'store', but requires the non-temporal bit to be set +def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast(N)) + return ST->isNonTemporal(); + return false; +}]>; + +def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast(N)) + return ST->isNonTemporal() && !ST->isTruncatingStore() && + ST->getAddressingMode() == ISD::UNINDEXED && + ST->getAlignment() >= 16; + return false; +}]>; + +def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), + (st node:$val, node:$ptr), [{ + if (StoreSDNode *ST = dyn_cast(N)) + return ST->isNonTemporal() && + ST->getAlignment() < 16; + return false; +}]>; + +def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; +def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; +def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; +def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; +def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; +def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; + +def vzmovl_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; +def vzmovl_v4i32 : PatFrag<(ops node:$src), + (bitconvert (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; + +def vzload_v2i64 : PatFrag<(ops node:$src), + (bitconvert (v2i64 (X86vzload node:$src)))>; + + +def fp32imm0 : PatLeaf<(f32 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + +// BYTE_imm - Transform bit immediates into byte immediates. +def BYTE_imm : SDNodeXForm> 3 + return getI32Imm(N->getZExtValue() >> 3); +}]>; + +// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, +// SHUFP* etc. imm. +def SHUFFLE_get_shuf_imm : SDNodeXForm; + +// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to +// PSHUFHW imm. +def SHUFFLE_get_pshufhw_imm : SDNodeXForm; + +// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to +// PSHUFLW imm. +def SHUFFLE_get_pshuflw_imm : SDNodeXForm; + +// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to +// a PALIGNR imm. +def SHUFFLE_get_palign_imm : SDNodeXForm; + +def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + ShuffleVectorSDNode *SVOp = cast(N); + return SVOp->isSplat() && SVOp->getSplatIndex() == 0; +}]>; + +def movddup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVDDUPMask(cast(N)); +}]>; + +def movhlps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPSMask(cast(N)); +}]>; + +def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVHLPS_v_undef_Mask(cast(N)); +}]>; + +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast(N)); +}]>; + +def movlp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLPMask(cast(N)); +}]>; + +def movl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLMask(cast(N)); +}]>; + +def movshdup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSHDUPMask(cast(N)); +}]>; + +def movsldup : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVSLDUPMask(cast(N)); +}]>; + +def unpckl : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKLMask(cast(N)); +}]>; + +def unpckh : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKHMask(cast(N)); +}]>; + +def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKL_v_undef_Mask(cast(N)); +}]>; + +def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isUNPCKH_v_undef_Mask(cast(N)); +}]>; + +def pshufd : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFDMask(cast(N)); +}], SHUFFLE_get_shuf_imm>; + +def shufp : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isSHUFPMask(cast(N)); +}], SHUFFLE_get_shuf_imm>; + +def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFHWMask(cast(N)); +}], SHUFFLE_get_pshufhw_imm>; + +def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPSHUFLWMask(cast(N)); +}], SHUFFLE_get_pshuflw_imm>; + +def palign : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isPALIGNRMask(cast(N)); +}], SHUFFLE_get_palign_imm>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 34e12ca..ce471ea 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: + case X86::MOV32rm_TC: case X86::MOV64rm: + case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: + case X86::MOV32mr_TC: case X86::MOV64mr: + case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) if (isFrameOperand(MI, 0, FrameIndex)) - return MI->getOperand(X86AddrNumOperands).getReg(); + return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc DL = Orig->getDebugLoc(); - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. bool Clone = true; @@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); } else { - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); + NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // least on modern x86 machines). BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg, RegState::Define, X86::sub_16bit) + .addReg(Src, getKillRegState(isKill)); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill).addImm(0); + .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); break; } case X86::INC16r: case X86::INC64_16r: - addLeaRegOffset(MIB, leaInReg, true, 1); + addRegOffset(MIB, leaInReg, true, 1); break; case X86::DEC16r: case X86::DEC64_16r: - addLeaRegOffset(MIB, leaInReg, true, -1); + addRegOffset(MIB, leaInReg, true, -1); break; case X86::ADD16ri: case X86::ADD16ri8: - addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; case X86::ADD16rr: { unsigned Src2 = MI->getOperand(2).getReg(); @@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) - .addReg(leaInReg2) - .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg2, RegState::Define, X86::sub_16bit) + .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *NewMI = MIB; MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::sub_16bit); + .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); if (LV) { // Update live variables @@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0); + .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } default: { @@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, 1); @@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, -1); @@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); + BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } -bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass in common. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Source and destination have the same register class. */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) { - // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy them as GR64. Similarly, for GR32_NOREX and - // GR32_NOSP, copy as GR32. - if (SrcRC->hasSuperClass(&X86::GR64RegClass) && - DestRC->hasSuperClass(&X86::GR64RegClass)) - CommonRC = &X86::GR64RegClass; - else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && - DestRC->hasSuperClass(&X86::GR32RegClass)) - CommonRC = &X86::GR32RegClass; +void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // First deal with the normal symmetric copies. + unsigned Opc = 0; + if (X86::GR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV64rr; + else if (X86::GR32RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV32rr; + else if (X86::GR16RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV16rr; + else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget().is64Bit()) + Opc = X86::MOV8rr_NOREX; else - CommonRC = 0; - } - - if (CommonRC) { - unsigned Opc; - if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass || - CommonRC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16RegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass || - CommonRC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rr_TC; - } else if (CommonRC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rr_TC; - } else if (CommonRC == &X86::RFP32RegClass) { - Opc = X86::MOV_Fp3232; - } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { - Opc = X86::MOV_Fp6464; - } else if (CommonRC == &X86::RFP80RegClass) { - Opc = X86::MOV_Fp8080; - } else if (CommonRC == &X86::FR32RegClass) { - Opc = X86::FsMOVAPSrr; - } else if (CommonRC == &X86::FR64RegClass) { - Opc = X86::FsMOVAPDrr; - } else if (CommonRC == &X86::VR128RegClass) { - Opc = X86::MOVAPSrr; - } else if (CommonRC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rr; - } else { - return false; - } - BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); - return true; + } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOVAPSrr; + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + + if (Opc) { + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } // Moving EFLAGS to / from another register requires a push and a pop. - if (SrcRC == &X86::CCRRegClass) { - if (SrcReg != X86::EFLAGS) - return false; - if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + if (SrcReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); - return true; - } else if (DestRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { + return; + } else if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); - return true; + return; } - } else if (DestRC == &X86::CCRRegClass) { - if (DestReg != X86::EFLAGS) - return false; - if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + } + if (DestReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); - return true; - } else if (SrcRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + return; + } else if (X86::GR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); - return true; - } - } - - // Moving from ST(0) turns into FpGET_ST0_32 etc. - if (SrcRC == &X86::RSTRegClass) { - // Copying from ST(0)/ST(1). - if (SrcReg != X86::ST0 && SrcReg != X86::ST1) - // Can only copy from ST(0)/ST(1) right now - return false; - bool isST0 = SrcReg == X86::ST0; - unsigned Opc; - if (DestRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; - else if (DestRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; - else { - if (DestRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + return; } - BuildMI(MBB, MI, DL, get(Opc), DestReg); - return true; } - // Moving to ST(0) turns into FpSET_ST0_32 etc. - if (DestRC == &X86::RSTRegClass) { - // Copying to ST(0) / ST(1). - if (DestReg != X86::ST0 && DestReg != X86::ST1) - // Can only copy to TOS right now - return false; - bool isST0 = DestReg == X86::ST0; - unsigned Opc; - if (SrcRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; - else if (SrcRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; - else { - if (SrcRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; - } - BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); - return true; - } - - // Not yet supported! - return false; + DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) + << " to " << RI.getName(DestReg) << '\n'); + llvm_unreachable("Cannot emit physreg copy instruction"); } -static unsigned getStoreRegOpcode(unsigned SrcReg, - const TargetRegisterClass *RC, - bool isStackAligned, - TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8RegClass) { +static unsigned getLoadStoreRegOpcode(unsigned Reg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM, + bool load) { + switch (RC->getID()) { + default: + llvm_unreachable("Unknown regclass"); + case X86::GR64RegClassID: + case X86::GR64_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32RegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_ADRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16RegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. - if (isHReg(SrcReg) && + if (isHReg(Reg) && TM.getSubtarget().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR8_ABCD_HRegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_ABCDRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_ABCDRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_ABCDRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_ABCD_LRegClassID: + return load ? X86::MOV8rm :X86::MOV8mr; + case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64mr_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32mr_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::ST_FpP80m; // pops - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::ST_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::ST_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSmr; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDmr; - } else if (RC == &X86::VR128RegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_NOREXRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_NOREXRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_NOREXRegClassID: + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_TCRegClassID: + return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; + case X86::GR32_TCRegClassID: + return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; + case X86::RFP80RegClassID: + return load ? X86::LD_Fp80m : X86::ST_FpP80m; + case X86::RFP64RegClassID: + return load ? X86::LD_Fp64m : X86::ST_Fp64m; + case X86::RFP32RegClassID: + return load ? X86::LD_Fp32m : X86::ST_Fp32m; + case X86::FR32RegClassID: + return load ? X86::MOVSSrm : X86::MOVSSmr; + case X86::FR64RegClassID: + return load ? X86::MOVSDrm : X86::MOVSDmr; + case X86::VR128RegClassID: // If stack is realigned we can use aligned stores. - Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64mr; - } else { - llvm_unreachable("Unknown regclass"); + if (isStackAligned) + return load ? X86::MOVAPSrm : X86::MOVAPSmr; + else + return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case X86::VR64RegClassID: + return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; } +} + +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); +} - return Opc; + +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(unsigned DestReg, - const TargetRegisterClass *RC, - bool isStackAligned, - const TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if (isHReg(DestReg) && - TM.getSubtarget().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rm_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rm_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::LD_Fp80m; - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::LD_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::LD_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSrm; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDrm; - } else if (RC == &X86::VR128RegClass) { - // If stack is realigned we can use aligned loads. - Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rm; - } else { - llvm_unreachable("Unknown regclass"); - } - - return Opc; -} void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + &X86::VR128RegClass, &RI); } } @@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &X86::VR128RegClass, &RI); } } return true; @@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } // No fusion - if (PrintFailedFusing) + if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; - SmallVector MOs; + SmallVector MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: @@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (TM.getSubtarget().is64Bit()) PICBase = X86::RIP; else - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // FIXME: PICBase = getGlobalBaseReg(&MF); // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. @@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) + for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); break; } @@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (I != OpcodeTablePtr->end()) return true; } - return false; + return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, @@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); - SmallVector AddrOps; + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; + SmallVector AddrOps; SmallVector BeforeOps; SmallVector AfterOps; SmallVector ImpOps; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index + X86AddrNumOperands) + if (i >= Index && i < Index + X86::AddrNumOperands) AddrOps.push_back(Op); else if (Op.isReg() && Op.isImplicit()) ImpOps.push_back(Op); @@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { + for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); if (MO.isReg()) MO.setIsKill(false); @@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned NumOps = N->getNumOperands(); for (unsigned i = 0; i != NumOps-1; ++i) { SDValue Op = N->getOperand(i); - if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) + if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) AddrOps.push_back(Op); else if (i < Index-NumDefs) BeforeOps.push_back(Op); @@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast(N)->memoperands_begin(), cast(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast(N)->memoperands_begin(), cast(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, EVT VT = Load1->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: { + default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. if (TM.getSubtargetImpl()->is64Bit()) { if (NumLoads >= 3) return false; - } else if (NumLoads) + } else if (NumLoads) { return false; + } break; - } case MVT::i8: case MVT::i16: case MVT::i32: @@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, case MVT::f64: if (NumLoads) return false; + break; } return true; @@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: return true; } return false; @@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) REX |= 1 << 2; @@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRMDestMem: { ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += AddrOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); @@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// +/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. +/// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { assert(!TM.getSubtarget().is64Bit() && "X86-64 PIC uses RIP relative addressing"); @@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { if (GlobalBaseReg != 0) return GlobalBaseReg; - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + // Create the register. The code to initialize it is inserted + // later, by the CGBR pass (below). MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - // Operand of MovePCtoStack is completely ignored by asm printer. It's - // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - - // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getSubtarget().isPICStyleGOT()) { - GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", - X86II::MO_GOT_ABSOLUTE_ADDRESS); - } else { - GlobalBaseReg = PC; - } - + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } @@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +namespace { + /// CGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for x86-32. + struct CGBR : public MachineFunctionPass { + static char ID; + CGBR() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast(&MF.getTarget()); + + assert(!TM->getSubtarget().is64Bit() && + "X86-64 PIC uses RIP relative addressing"); + + // Only emit a global base reg in PIC mode. + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + unsigned PC; + if (TM->getSubtarget().isPICStyleGOT()) + PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + else + PC = TII->getGlobalBaseReg(&MF); + + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); + + // If we're using vanilla 'GOT' PIC style, we should use relative addressing + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. + if (TM->getSubtarget().isPICStyleGOT()) { + unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", + X86II::MO_GOT_ABSOLUTE_ADDRESS); + } + + return true; + } + + virtual const char *getPassName() const { + return "X86 PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char CGBR::ID = 0; +FunctionPass* +llvm::createGlobalBaseRegPass() { return new CGBR(); } diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 62d7c74..f762b58 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -24,6 +24,24 @@ namespace llvm { class X86TargetMachine; namespace X86 { + // Enums for memory operand decoding. Each memory operand is represented with + // a 5 operand sequence in the form: + // [BaseReg, ScaleAmt, IndexReg, Disp, Segment] + // These enums help decode this. + enum { + AddrBaseReg = 0, + AddrScaleAmt = 1, + AddrIndexReg = 2, + AddrDisp = 3, + + /// AddrSegmentReg - The operand # of the segment in the memory operand. + AddrSegmentReg = 4, + + /// AddrNumOperands - Total number of operands in a memory reference. + AddrNumOperands = 5 + }; + + // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -173,7 +191,19 @@ namespace X86II { /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. - MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE + MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, + + /// MO_TLVP - On a symbol operand this indicates that the immediate is + /// some TLS offset. + /// + /// This is the TLS offset for the Darwin TLS mechanism. + MO_TLVP, + + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate + /// is some TLS offset from the picbase. + /// + /// This is the 32-bit TLS offset for Darwin TLS in PIC mode. + MO_TLVP_PIC_BASE }; } @@ -203,6 +233,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { case X86II::MO_PIC_BASE_OFFSET: // Darwin local global. case X86II::MO_DARWIN_NONLAZY_PIC_BASE: // Darwin/32 external global. case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: // Darwin/32 hidden global. + case X86II::MO_TLVP: // ??? Pretty sure.. return true; default: return false; @@ -347,9 +378,10 @@ namespace X86II { Imm8 = 1 << ImmShift, Imm8PCRel = 2 << ImmShift, Imm16 = 3 << ImmShift, - Imm32 = 4 << ImmShift, - Imm32PCRel = 5 << ImmShift, - Imm64 = 6 << ImmShift, + Imm16PCRel = 4 << ImmShift, + Imm32 = 5 << ImmShift, + Imm32PCRel = 6 << ImmShift, + Imm64 = 7 << ImmShift, //===------------------------------------------------------------------===// // FP Instruction Classification... Zero is non-fp instruction. @@ -403,28 +435,47 @@ namespace X86II { SSEDomainShift = 22, OpcodeShift = 24, - OpcodeMask = 0xFF << OpcodeShift + OpcodeMask = 0xFF << OpcodeShift, + + //===------------------------------------------------------------------===// + // VEX - The opcode prefix used by AVX instructions + VEX = 1ULL << 32, + + // VEX_W - Has a opcode specific functionality, but is used in the same + // way as REX_W is for regular SSE instructions. + VEX_W = 1ULL << 33, + + // VEX_4V - Used to specify an additional AVX/SSE register. Several 2 + // address instructions in SSE are represented as 3 address ones in AVX + // and the additional register is encoded in VEX_VVVV prefix. + VEX_4V = 1ULL << 34, + + // VEX_I8IMM - Specifies that the last register used in a AVX instruction, + // must be encoded in the i8 immediate field. This usually happens in + // instructions with 4 operands. + VEX_I8IMM = 1ULL << 35 }; // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // - static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) { + static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - static inline bool hasImm(unsigned TSFlags) { + static inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. - static inline unsigned getSizeOfImm(unsigned TSFlags) { + static inline unsigned getSizeOfImm(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { default: assert(0 && "Unknown immediate size"); case X86II::Imm8: case X86II::Imm8PCRel: return 1; - case X86II::Imm16: return 2; + case X86II::Imm16: + case X86II::Imm16PCRel: return 2; case X86II::Imm32: case X86II::Imm32PCRel: return 4; case X86II::Imm64: return 8; @@ -433,23 +484,77 @@ namespace X86II { /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. - static inline unsigned isImmPCRel(unsigned TSFlags) { + static inline unsigned isImmPCRel(uint64_t TSFlags) { switch (TSFlags & X86II::ImmMask) { - default: assert(0 && "Unknown immediate size"); - case X86II::Imm8PCRel: - case X86II::Imm32PCRel: - return true; - case X86II::Imm8: - case X86II::Imm16: - case X86II::Imm32: - case X86II::Imm64: - return false; + default: assert(0 && "Unknown immediate size"); + case X86II::Imm8PCRel: + case X86II::Imm16PCRel: + case X86II::Imm32PCRel: + return true; + case X86II::Imm8: + case X86II::Imm16: + case X86II::Imm32: + case X86II::Imm64: + return false; + } + } + + /// getMemoryOperandNo - The function returns the MCInst operand # for the + /// first field of the memory operand. If the instruction doesn't have a + /// memory operand, this returns -1. + /// + /// Note that this ignores tied operands. If there is a tied register which + /// is duplicated in the MCInst (e.g. "EAX = addl EAX, [mem]") it is only + /// counted as one operand. + /// + static inline int getMemoryOperandNo(uint64_t TSFlags) { + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form"); + default: assert(0 && "Unknown FormMask value in getMemoryOperandNo!"); + case X86II::Pseudo: + case X86II::RawFrm: + case X86II::AddRegFrm: + case X86II::MRMDestReg: + case X86II::MRMSrcReg: + return -1; + case X86II::MRMDestMem: + return 0; + case X86II::MRMSrcMem: { + bool HasVEX_4V = TSFlags & X86II::VEX_4V; + unsigned FirstMemOp = 1; + if (HasVEX_4V) + ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). + + // FIXME: Maybe lea should have its own form? This is a horrible hack. + //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || + // Opcode == X86::LEA16r || Opcode == X86::LEA32r) + return FirstMemOp; } - } + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: + case X86II::MRM4r: case X86II::MRM5r: + case X86II::MRM6r: case X86II::MRM7r: + return -1; + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + return 0; + case X86II::MRM_C1: + case X86II::MRM_C2: + case X86II::MRM_C3: + case X86II::MRM_C4: + case X86II::MRM_C8: + case X86II::MRM_C9: + case X86II::MRM_E8: + case X86II::MRM_F0: + case X86II::MRM_F8: + case X86II::MRM_F9: + return -1; + } + } } -const int X86AddrNumOperands = 5; - inline static bool isScale(const MachineOperand &MO) { return MO.isImm() && (MO.getImm() == 1 || MO.getImm() == 2 || @@ -555,7 +660,7 @@ public: void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const; + const TargetRegisterInfo &TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -585,13 +690,12 @@ public: virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + const SmallVectorImpl &Cond, + DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d59c42..1efef5a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -72,6 +72,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>; +def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; + def SDT_X86SegmentBaseAddress : SDTypeProfile<1, 1, [SDTCisPtrTy<0>]>; def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -182,6 +184,9 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86MingwAlloca : SDNode<"X86ISD::MINGW_ALLOCA", SDTX86Void, [SDNPHasChain, SDNPInFlag, SDNPOutFlag]>; + +def X86TLSCall : SDNode<"X86ISD::TLSCALL", SDT_X86TLSCALL, + []>; //===----------------------------------------------------------------------===// // X86 Operand Definitions. @@ -197,13 +202,9 @@ def X86MemAsmOperand : AsmOperandClass { let Name = "Mem"; let SuperClasses = []; } -def X86NoSegMemAsmOperand : AsmOperandClass { - let Name = "NoSegMem"; - let SuperClasses = [X86MemAsmOperand]; -} def X86AbsMemAsmOperand : AsmOperandClass { let Name = "AbsMem"; - let SuperClasses = [X86NoSegMemAsmOperand]; + let SuperClasses = [X86MemAsmOperand]; } class X86MemOperand : Operand { let PrintMethod = printMethod; @@ -226,7 +227,7 @@ def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; -//def f256mem : X86MemOperand<"printf256mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. @@ -245,15 +246,11 @@ def i32mem_TC : Operand { let ParserMatchClass = X86MemAsmOperand; } -def lea32mem : Operand { - let PrintMethod = "printlea32mem"; - let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm); - let ParserMatchClass = X86NoSegMemAsmOperand; -} let ParserMatchClass = X86AbsMemAsmOperand, PrintMethod = "print_pcrel_imm" in { def i32imm_pcrel : Operand; +def i16imm_pcrel : Operand; def offset8 : Operand; def offset16 : Operand; @@ -283,26 +280,31 @@ class ImmSExtAsmOperandClass : AsmOperandClass { // 64-bit immediates, but for a 16-bit target value we want to accept both "-1" // (which will be a -1ULL), and "0xFF" (-1 in 16-bits). -// [0, 0x7FFFFFFF] | [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] +// [0, 0x7FFFFFFF] | +// [0xFFFFFFFF80000000, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i32AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i32"; } -// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x000000000000FF80, 0x000000000000FFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti16i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti16i8"; let SuperClasses = [ImmSExti64i32AsmOperand]; } -// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | [0x00000000FFFFFF80, 0x00000000FFFFFFFF] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti32i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti32i8"; } -// [0, 0x0000007F] | [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] +// [0, 0x0000007F] | +// [0xFFFFFFFFFFFFFF80, 0xFFFFFFFFFFFFFFFF] def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass { let Name = "ImmSExti64i8"; - let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, ImmSExti64i32AsmOperand]; + let SuperClasses = [ImmSExti16i8AsmOperand, ImmSExti32i8AsmOperand, + ImmSExti64i32AsmOperand]; } // A couple of more descriptive operand definitions. @@ -321,10 +323,10 @@ def i32i8imm : Operand { // Define X86 specific addressing mode. def addr : ComplexPattern; -def lea32addr : ComplexPattern; -def tls32addr : ComplexPattern; //===----------------------------------------------------------------------===// @@ -704,6 +706,12 @@ let isCall = 1 in "lcall{w}\t{*}$dst", []>, OpSize; def FARCALL32m : I<0xFF, MRM3m, (outs), (ins opaque48mem:$dst), "lcall{l}\t{*}$dst", []>; + + // callw for 16 bit code for the assembler. + let isAsmParserOnly = 1 in + def CALLpcrel16 : Ii16PCRel<0xE8, RawFrm, + (outs), (ins i16imm_pcrel:$dst, variable_ops), + "callw\t$dst", []>, OpSize; } // Constructing a stack frame. @@ -737,18 +745,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in "jmp\t$dst # TAILCALL", []>; def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops), - "jmp{l}\t{*}$dst # TAILCALL", - []>; + "", []>; // FIXME: Remove encoding when JIT is dead. let mayLoad = 1 in def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops), "jmp{l}\t{*}$dst # TAILCALL", []>; - - // FIXME: This is a hack so that MCInst lowering can preserve the TAILCALL - // marker on instructions, while still being able to relax. - let isCodeGenOnly = 1 in { - def TAILJMP_1 : Ii8PCRel<0xEB, RawFrm, (outs), (ins brtarget8:$dst), - "jmp\t$dst # TAILCALL", []>; - } } //===----------------------------------------------------------------------===// @@ -815,7 +815,18 @@ def PUSHF32 : I<0x9C, RawFrm, (outs), (ins), "pushf{l|d}", []>, Requires<[In32BitMode]>; } -let isTwoAddress = 1 in // GR32 = bswap GR32 +let Defs = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], Uses = [ESP], + mayLoad=1, neverHasSideEffects=1 in { +def POPA32 : I<0x61, RawFrm, (outs), (ins), "popa{l}", []>, + Requires<[In32BitMode]>; +} +let Defs = [ESP], Uses = [EDI, ESI, EBP, EBX, EDX, ECX, EAX, ESP], + mayStore=1, neverHasSideEffects=1 in { +def PUSHA32 : I<0x60, RawFrm, (outs), (ins), "pusha{l}", []>, + Requires<[In32BitMode]>; +} + +let Uses = [EFLAGS], Constraints = "$src = $dst" in // GR32 = bswap GR32 def BSWAP32r : I<0xC8, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), "bswap{l}\t$dst", @@ -855,11 +866,11 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), let neverHasSideEffects = 1 in def LEA16r : I<0x8D, MRMSrcMem, - (outs GR16:$dst), (ins lea32mem:$src), + (outs GR16:$dst), (ins i32mem:$src), "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize; let isReMaterializable = 1 in def LEA32r : I<0x8D, MRMSrcMem, - (outs GR32:$dst), (ins lea32mem:$src), + (outs GR32:$dst), (ins i32mem:$src), "lea{l}\t{$src|$dst}, {$dst|$src}", [(set GR32:$dst, lea32addr:$src)]>, Requires<[In32BitMode]>; @@ -1239,7 +1250,7 @@ def IDIV32m: I<0xF7, MRM7m, (outs), (ins i32mem:$src), //===----------------------------------------------------------------------===// // Two address Instructions. // -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { // Conditional moves let Uses = [EFLAGS] in { @@ -1640,7 +1651,7 @@ def CMOVNO32rm : I<0x41, MRMSrcMem, // if !overflow, GR32 = [mem32] // i8 register pressure. Note that CMOV_GR8 is conservatively considered to // clobber EFLAGS, because if one of the operands is zero, the expansion // could involve an xor. -let usesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] in { +let usesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] in { def CMOV_GR8 : I<0, Pseudo, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2, i8imm:$cond), "#CMOV_GR8 PSEUDO!", @@ -1659,86 +1670,106 @@ def CMOV_GR16 : I<0, Pseudo, [(set GR16:$dst, (X86cmov GR16:$src1, GR16:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP32 : I<0, Pseudo, - (outs RFP32:$dst), (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), + (outs RFP32:$dst), + (ins RFP32:$src1, RFP32:$src2, i8imm:$cond), "#CMOV_RFP32 PSEUDO!", - [(set RFP32:$dst, (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, + [(set RFP32:$dst, + (X86cmov RFP32:$src1, RFP32:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP64 : I<0, Pseudo, - (outs RFP64:$dst), (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), + (outs RFP64:$dst), + (ins RFP64:$src1, RFP64:$src2, i8imm:$cond), "#CMOV_RFP64 PSEUDO!", - [(set RFP64:$dst, (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, + [(set RFP64:$dst, + (X86cmov RFP64:$src1, RFP64:$src2, imm:$cond, EFLAGS))]>; def CMOV_RFP80 : I<0, Pseudo, - (outs RFP80:$dst), (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), + (outs RFP80:$dst), + (ins RFP80:$src1, RFP80:$src2, i8imm:$cond), "#CMOV_RFP80 PSEUDO!", - [(set RFP80:$dst, (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, + [(set RFP80:$dst, + (X86cmov RFP80:$src1, RFP80:$src2, imm:$cond, EFLAGS))]>; } // Predicates = [NoCMov] -} // UsesCustomInserter = 1, isTwoAddress = 0, Defs = [EFLAGS] +} // UsesCustomInserter = 1, Constraints = "", Defs = [EFLAGS] } // Uses = [EFLAGS] // unary instructions let CodeSize = 2 in { let Defs = [EFLAGS] in { -def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src), "neg{b}\t$dst", - [(set GR8:$dst, (ineg GR8:$src)), +def NEG8r : I<0xF6, MRM3r, (outs GR8 :$dst), (ins GR8 :$src1), + "neg{b}\t$dst", + [(set GR8:$dst, (ineg GR8:$src1)), (implicit EFLAGS)]>; -def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src), "neg{w}\t$dst", - [(set GR16:$dst, (ineg GR16:$src)), +def NEG16r : I<0xF7, MRM3r, (outs GR16:$dst), (ins GR16:$src1), + "neg{w}\t$dst", + [(set GR16:$dst, (ineg GR16:$src1)), (implicit EFLAGS)]>, OpSize; -def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src), "neg{l}\t$dst", - [(set GR32:$dst, (ineg GR32:$src)), +def NEG32r : I<0xF7, MRM3r, (outs GR32:$dst), (ins GR32:$src1), + "neg{l}\t$dst", + [(set GR32:$dst, (ineg GR32:$src1)), (implicit EFLAGS)]>; -let isTwoAddress = 0 in { - def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), "neg{b}\t$dst", + +let Constraints = "" in { + def NEG8m : I<0xF6, MRM3m, (outs), (ins i8mem :$dst), + "neg{b}\t$dst", [(store (ineg (loadi8 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; - def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), "neg{w}\t$dst", + def NEG16m : I<0xF7, MRM3m, (outs), (ins i16mem:$dst), + "neg{w}\t$dst", [(store (ineg (loadi16 addr:$dst)), addr:$dst), (implicit EFLAGS)]>, OpSize; - def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), "neg{l}\t$dst", + def NEG32m : I<0xF7, MRM3m, (outs), (ins i32mem:$dst), + "neg{l}\t$dst", [(store (ineg (loadi32 addr:$dst)), addr:$dst), (implicit EFLAGS)]>; -} +} // Constraints = "" } // Defs = [EFLAGS] // Match xor -1 to not. Favors these over a move imm + xor to save code size. let AddedComplexity = 15 in { -def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src), "not{b}\t$dst", - [(set GR8:$dst, (not GR8:$src))]>; -def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src), "not{w}\t$dst", - [(set GR16:$dst, (not GR16:$src))]>, OpSize; -def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src), "not{l}\t$dst", - [(set GR32:$dst, (not GR32:$src))]>; +def NOT8r : I<0xF6, MRM2r, (outs GR8 :$dst), (ins GR8 :$src1), + "not{b}\t$dst", + [(set GR8:$dst, (not GR8:$src1))]>; +def NOT16r : I<0xF7, MRM2r, (outs GR16:$dst), (ins GR16:$src1), + "not{w}\t$dst", + [(set GR16:$dst, (not GR16:$src1))]>, OpSize; +def NOT32r : I<0xF7, MRM2r, (outs GR32:$dst), (ins GR32:$src1), + "not{l}\t$dst", + [(set GR32:$dst, (not GR32:$src1))]>; } -let isTwoAddress = 0 in { - def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), "not{b}\t$dst", +let Constraints = "" in { + def NOT8m : I<0xF6, MRM2m, (outs), (ins i8mem :$dst), + "not{b}\t$dst", [(store (not (loadi8 addr:$dst)), addr:$dst)]>; - def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), "not{w}\t$dst", + def NOT16m : I<0xF7, MRM2m, (outs), (ins i16mem:$dst), + "not{w}\t$dst", [(store (not (loadi16 addr:$dst)), addr:$dst)]>, OpSize; - def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), "not{l}\t$dst", + def NOT32m : I<0xF7, MRM2m, (outs), (ins i32mem:$dst), + "not{l}\t$dst", [(store (not (loadi32 addr:$dst)), addr:$dst)]>; -} +} // Constraints = "" } // CodeSize // TODO: inc/dec is slow for P4, but fast for Pentium-M. let Defs = [EFLAGS] in { let CodeSize = 2 in -def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>; +def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), + "inc{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "inc{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "inc{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src1))]>, Requires<[In32BitMode]>; } -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>; @@ -1750,23 +1781,24 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), 1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 let CodeSize = 2 in -def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst", - [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>; +def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), + "dec{b}\t$dst", + [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src1))]>; let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA. -def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src), +def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src1), "dec{w}\t$dst", - [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>, + [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src1))]>, OpSize, Requires<[In32BitMode]>; -def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src), +def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1), "dec{l}\t$dst", - [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>, + [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src1))]>, Requires<[In32BitMode]>; -} +} // CodeSize = 2 -let isTwoAddress = 0, CodeSize = 2 in { +let Constraints = "", CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>; @@ -1778,7 +1810,7 @@ let isTwoAddress = 0, CodeSize = 2 in { [(store (add (loadi32 addr:$dst), -1), addr:$dst), (implicit EFLAGS)]>, Requires<[In32BitMode]>; -} +} // Constraints = "", CodeSize = 2 } // Defs = [EFLAGS] // Logical operators... @@ -1857,7 +1889,7 @@ def AND32ri8 : Ii8<0x83, MRM4r, [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def AND8mr : I<0x20, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "and{b}\t{$src, $dst|$dst, $src}", @@ -1909,7 +1941,7 @@ let isTwoAddress = 0 in { def AND32i32 : Ii32<0x25, RawFrm, (outs), (ins i32imm:$src), "and{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y @@ -1983,7 +2015,7 @@ def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst), "or{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src), "or{b}\t{$src, $dst|$dst, $src}", [(store (or (load addr:$dst), GR8:$src), addr:$dst), @@ -2025,7 +2057,7 @@ let isTwoAddress = 0 in { "or{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def OR32i32 : Ii32 <0x0D, RawFrm, (outs), (ins i32imm:$src), "or{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y @@ -2102,7 +2134,7 @@ def XOR32ri8 : Ii8<0x83, MRM6r, [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def XOR8mr : I<0x30, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src), "xor{b}\t{$src, $dst|$dst, $src}", @@ -2153,26 +2185,27 @@ let isTwoAddress = 0 in { "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src), "xor{l}\t{$src, %eax|%eax, $src}", []>; -} // isTwoAddress = 0 +} // Constraints = "" } // Defs = [EFLAGS] // Shift instructions let Defs = [EFLAGS] in { let Uses = [CL] in { -def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src), +def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1), "shl{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (shl GR8:$src, CL))]>; -def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (shl GR8:$src1, CL))]>; +def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src1), "shl{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize; -def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (shl GR16:$src1, CL))]>, OpSize; +def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src1), "shl{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (shl GR32:$src, CL))]>; + [(set GR32:$dst, (shl GR32:$src1, CL))]>; } // Uses = [CL] def SHL8ri : Ii8<0xC0, MRM4r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), "shl{b}\t{$src2, $dst|$dst, $src2}", [(set GR8:$dst, (shl GR8:$src1, (i8 imm:$src2)))]>; + let isConvertibleToThreeAddress = 1 in { // Can transform into LEA. def SHL16ri : Ii8<0xC1, MRM4r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$src2), "shl{w}\t{$src2, $dst|$dst, $src2}", @@ -2193,7 +2226,7 @@ def SHL32r1 : I<0xD1, MRM4r, (outs GR32:$dst), (ins GR32:$src1), } // isConvertibleToThreeAddress = 1 -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst), "shl{b}\t{%cl, $dst|$dst, CL}", @@ -2227,18 +2260,18 @@ let isTwoAddress = 0 in { def SHL32m1 : I<0xD1, MRM4m, (outs), (ins i32mem:$dst), "shl{l}\t$dst", [(store (shl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src), +def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src1), "shr{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (srl GR8:$src, CL))]>; -def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (srl GR8:$src1, CL))]>; +def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src1), "shr{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize; -def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (srl GR16:$src1, CL))]>, OpSize; +def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (srl GR32:$src, CL))]>; + [(set GR32:$dst, (srl GR32:$src1, CL))]>; } def SHR8ri : Ii8<0xC0, MRM5r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), @@ -2262,7 +2295,7 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1), "shr{l}\t$dst", [(set GR32:$dst, (srl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst), "shr{b}\t{%cl, $dst|$dst, CL}", @@ -2296,18 +2329,18 @@ let isTwoAddress = 0 in { def SHR32m1 : I<0xD1, MRM5m, (outs), (ins i32mem:$dst), "shr{l}\t$dst", [(store (srl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src), +def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1), "sar{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (sra GR8:$src, CL))]>; -def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (sra GR8:$src1, CL))]>; +def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src1), "sar{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize; -def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (sra GR16:$src1, CL))]>, OpSize; +def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (sra GR32:$src, CL))]>; + [(set GR32:$dst, (sra GR32:$src1, CL))]>; } def SAR8ri : Ii8<0xC0, MRM7r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2332,7 +2365,7 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1), "sar{l}\t$dst", [(set GR32:$dst, (sra GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst), "sar{b}\t{%cl, $dst|$dst, CL}", @@ -2366,65 +2399,65 @@ let isTwoAddress = 0 in { def SAR32m1 : I<0xD1, MRM7m, (outs), (ins i32mem:$dst), "sar{l}\t$dst", [(store (sra (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" // Rotate instructions -def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8r1 : I<0xD0, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src), +def RCL8rCL : I<0xD2, MRM2r, (outs GR8:$dst), (ins GR8:$src1), "rcl{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCL8ri : Ii8<0xC0, MRM2r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcl{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16r1 : I<0xD1, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src), +def RCL16rCL : I<0xD3, MRM2r, (outs GR16:$dst), (ins GR16:$src1), "rcl{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCL16ri : Ii8<0xC1, MRM2r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcl{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32r1 : I<0xD1, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src), +def RCL32rCL : I<0xD3, MRM2r, (outs GR32:$dst), (ins GR32:$src1), "rcl{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCL32ri : Ii8<0xC1, MRM2r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcl{l}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8r1 : I<0xD0, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src), +def RCR8rCL : I<0xD2, MRM3r, (outs GR8:$dst), (ins GR8:$src1), "rcr{b}\t{%cl, $dst|$dst, CL}", []>; } -def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src, i8imm:$cnt), +def RCR8ri : Ii8<0xC0, MRM3r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$cnt), "rcr{b}\t{$cnt, $dst|$dst, $cnt}", []>; -def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16r1 : I<0xD1, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{1, $dst|$dst, 1}", []>, OpSize; let Uses = [CL] in { -def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src), +def RCR16rCL : I<0xD3, MRM3r, (outs GR16:$dst), (ins GR16:$src1), "rcr{w}\t{%cl, $dst|$dst, CL}", []>, OpSize; } -def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src, i8imm:$cnt), +def RCR16ri : Ii8<0xC1, MRM3r, (outs GR16:$dst), (ins GR16:$src1, i8imm:$cnt), "rcr{w}\t{$cnt, $dst|$dst, $cnt}", []>, OpSize; -def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32r1 : I<0xD1, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{1, $dst|$dst, 1}", []>; let Uses = [CL] in { -def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src), +def RCR32rCL : I<0xD3, MRM3r, (outs GR32:$dst), (ins GR32:$src1), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src, i8imm:$cnt), +def RCR32ri : Ii8<0xC1, MRM3r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$cnt), "rcr{l}\t{$cnt, $dst|$dst, $cnt}", []>; -let isTwoAddress = 0 in { +let Constraints = "" in { def RCL8m1 : I<0xD0, MRM2m, (outs), (ins i8mem:$dst), "rcl{b}\t{1, $dst|$dst, 1}", []>; def RCL8mi : Ii8<0xC0, MRM2m, (outs), (ins i8mem:$dst, i8imm:$cnt), @@ -2464,19 +2497,19 @@ def RCR16mCL : I<0xD3, MRM3m, (outs), (ins i16mem:$dst), def RCR32mCL : I<0xD3, MRM3m, (outs), (ins i32mem:$dst), "rcr{l}\t{%cl, $dst|$dst, CL}", []>; } -} +} // Constraints = "" // FIXME: provide shorter instructions when imm8 == 1 let Uses = [CL] in { -def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), +def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1), "rol{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotl GR8:$src, CL))]>; -def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotl GR8:$src1, CL))]>; +def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src1), "rol{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize; -def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotl GR16:$src1, CL))]>, OpSize; +def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotl GR32:$src, CL))]>; + [(set GR32:$dst, (rotl GR32:$src1, CL))]>; } def ROL8ri : Ii8<0xC0, MRM0r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2501,7 +2534,7 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1), "rol{l}\t$dst", [(set GR32:$dst, (rotl GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst), "rol{b}\t{%cl, $dst|$dst, CL}", @@ -2535,18 +2568,18 @@ let isTwoAddress = 0 in { def ROL32m1 : I<0xD1, MRM0m, (outs), (ins i32mem:$dst), "rol{l}\t$dst", [(store (rotl (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} +} // Constraints = "" let Uses = [CL] in { -def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), +def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1), "ror{b}\t{%cl, $dst|$dst, CL}", - [(set GR8:$dst, (rotr GR8:$src, CL))]>; -def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src), + [(set GR8:$dst, (rotr GR8:$src1, CL))]>; +def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src1), "ror{w}\t{%cl, $dst|$dst, CL}", - [(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize; -def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src), + [(set GR16:$dst, (rotr GR16:$src1, CL))]>, OpSize; +def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t{%cl, $dst|$dst, CL}", - [(set GR32:$dst, (rotr GR32:$src, CL))]>; + [(set GR32:$dst, (rotr GR32:$src1, CL))]>; } def ROR8ri : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, i8imm:$src2), @@ -2571,7 +2604,7 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1), "ror{l}\t$dst", [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst), "ror{b}\t{%cl, $dst|$dst, CL}", @@ -2605,8 +2638,7 @@ let isTwoAddress = 0 in { def ROR32m1 : I<0xD1, MRM1m, (outs), (ins i32mem:$dst), "ror{l}\t$dst", [(store (rotr (loadi32 addr:$dst), (i8 1)), addr:$dst)]>; -} - +} // Constraints = "" // Double shift instructions (generalizations of rotate) @@ -2662,7 +2694,7 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg, TB, OpSize; } -let isTwoAddress = 0 in { +let Constraints = "" in { let Uses = [CL] in { def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}", @@ -2708,7 +2740,7 @@ let isTwoAddress = 0 in { [(store (X86shrd (loadi16 addr:$dst), GR16:$src2, (i8 imm:$src3)), addr:$dst)]>, TB, OpSize; -} +} // Constraints = "" } // Defs = [EFLAGS] @@ -2794,7 +2826,7 @@ def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst), (X86add_flag GR32:$src1, i32immSExt8:$src2))]>; } -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Addition def ADD8mr : I<0x00, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "add{b}\t{$src2, $dst|$dst, $src2}", @@ -2838,7 +2870,7 @@ let isTwoAddress = 0 in { "add{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADD32i32 : Ii32<0x05, RawFrm, (outs), (ins i32imm:$src), "add{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { let isCommutable = 1 in { // X = ADC Y, Z --> X = ADC Z, Y @@ -2900,7 +2932,7 @@ def ADC32ri8 : Ii8<0x83, MRM2r, (outs GR32:$dst), "adc{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (adde GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def ADC8mr : I<0x10, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "adc{b}\t{$src2, $dst|$dst, $src2}", [(store (adde (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -2935,7 +2967,7 @@ let isTwoAddress = 0 in { "adc{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def ADC32i32 : Ii32<0x15, RawFrm, (outs), (ins i32imm:$src), "adc{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" } // Uses = [EFLAGS] // Register-Register Subtraction @@ -3007,7 +3039,7 @@ def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst), [(set GR32:$dst, EFLAGS, (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { // Memory-Register Subtraction def SUB8mr : I<0x28, MRMDestMem, (outs), (ins i8mem :$dst, GR8 :$src2), "sub{b}\t{$src2, $dst|$dst, $src2}", @@ -3052,7 +3084,7 @@ let isTwoAddress = 0 in { "sub{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SUB32i32 : Ii32<0x2D, RawFrm, (outs), (ins i32imm:$src), "sub{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let Uses = [EFLAGS] in { def SBB8rr : I<0x18, MRMDestReg, (outs GR8:$dst), @@ -3068,7 +3100,7 @@ def SBB32rr : I<0x19, MRMDestReg, (outs GR32:$dst), "sbb{l}\t{$src2, $dst|$dst, $src2}", [(set GR32:$dst, (sube GR32:$src1, GR32:$src2))]>; -let isTwoAddress = 0 in { +let Constraints = "" in { def SBB8mr : I<0x18, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), "sbb{b}\t{$src2, $dst|$dst, $src2}", [(store (sube (load addr:$dst), GR8:$src2), addr:$dst)]>; @@ -3103,7 +3135,7 @@ let isTwoAddress = 0 in { "sbb{w}\t{$src, %ax|%ax, $src}", []>, OpSize; def SBB32i32 : Ii32<0x1D, RawFrm, (outs), (ins i32imm:$src), "sbb{l}\t{$src, %eax|%eax, $src}", []>; -} +} // Constraints = "" let isCodeGenOnly = 1 in { def SBB8rr_REV : I<0x1A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2), @@ -3811,6 +3843,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", // Thread Local Storage Instructions // +// ELF TLS Support // All calls clobber the non-callee saved registers. ESP is marked as // a use to prevent stack-pointer assignments that appear immediately // before calls from potentially appearing dead. @@ -3819,12 +3852,24 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS], Uses = [ESP] in -def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym), +def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym), "leal\t$sym, %eax; " "call\t___tls_get_addr@PLT", [(X86tlsaddr tls32addr:$sym)]>, Requires<[In32BitMode]>; +// Darwin TLS Support +// For i386, the address of the thunk is passed on the stack, on return the +// address of the variable is in %eax. %ecx is trashed during the function +// call. All other registers are preserved. +let Defs = [EAX, ECX], + Uses = [ESP], + usesCustomInserter = 1 in +def TLSCall_32 : I<0, Pseudo, (outs), (ins i32mem:$sym), + "# TLSCall_32", + [(X86TLSCall addr:$sym)]>, + Requires<[In32BitMode]>; + let AddedComplexity = 5, isCodeGenOnly = 1 in def GS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%gs:$src, $dst", @@ -4783,14 +4828,14 @@ def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>; // Patterns for nodes that do not produce flags, for instructions that do. // Increment reg. -def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , 1), (INC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, 1), (INC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, 1), (INC32r GR32:$src1)>, Requires<[In32BitMode]>; // Decrement reg. -def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; +def : Pat<(add GR8:$src1 , -1), (DEC8r GR8:$src1)>; +def : Pat<(add GR16:$src1, -1), (DEC16r GR16:$src1)>, Requires<[In32BitMode]>; +def : Pat<(add GR32:$src1, -1), (DEC32r GR32:$src1)>, Requires<[In32BitMode]>; // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 0952fc8..6cf7ac8 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -513,30 +513,20 @@ def : Pat<(store (v4i16 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v2i32 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; -def : Pat<(store (v2f32 VR64:$src), addr:$dst), - (MMX_MOVQ64mr addr:$dst, VR64:$src)>; def : Pat<(store (v1i64 VR64:$src), addr:$dst), (MMX_MOVQ64mr addr:$dst, VR64:$src)>; // Bit convert. def : Pat<(v8i8 (bitconvert (v1i64 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v2i32 VR64:$src))), (v8i8 VR64:$src)>; -def : Pat<(v8i8 (bitconvert (v2f32 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v8i8 (bitconvert (v4i16 VR64:$src))), (v8i8 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v2i32 VR64:$src))), (v4i16 VR64:$src)>; -def : Pat<(v4i16 (bitconvert (v2f32 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v4i16 (bitconvert (v8i8 VR64:$src))), (v4i16 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2i32 (bitconvert (v2f32 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v4i16 VR64:$src))), (v2i32 VR64:$src)>; def : Pat<(v2i32 (bitconvert (v8i8 VR64:$src))), (v2i32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v1i64 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v2i32 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v4i16 VR64:$src))), (v2f32 VR64:$src)>; -def : Pat<(v2f32 (bitconvert (v8i8 VR64:$src))), (v2f32 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v2i32 VR64:$src))), (v1i64 VR64:$src)>; -def : Pat<(v1i64 (bitconvert (v2f32 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v4i16 VR64:$src))), (v1i64 VR64:$src)>; def : Pat<(v1i64 (bitconvert (v8i8 VR64:$src))), (v1i64 VR64:$src)>; @@ -545,8 +535,6 @@ def : Pat<(v1i64 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v2i32 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; -def : Pat<(v2f32 (bitconvert (i64 GR64:$src))), - (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v4i16 (bitconvert (i64 GR64:$src))), (MMX_MOVD64to64rr GR64:$src)>; def : Pat<(v8i8 (bitconvert (i64 GR64:$src))), @@ -555,8 +543,6 @@ def : Pat<(i64 (bitconvert (v1i64 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v2i32 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; -def : Pat<(i64 (bitconvert (v2f32 VR64:$src))), - (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v4i16 VR64:$src))), (MMX_MOVD64from64rr VR64:$src)>; def : Pat<(i64 (bitconvert (v8i8 VR64:$src))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 5580ba7..ab0005b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -15,322 +15,6 @@ //===----------------------------------------------------------------------===// -// SSE specific DAG Nodes. -//===----------------------------------------------------------------------===// - -def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, - SDTCisFP<0>, SDTCisInt<2> ]>; -def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, - SDTCisFP<1>, SDTCisVT<3, i8>]>; - -def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; -def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; -def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, - [SDNPCommutative, SDNPAssociative]>; -def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; -def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; -def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; -def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; -def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; -def X86pshufb : SDNode<"X86ISD::PSHUFB", - SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisSameAs<0,2>]>>; -def X86pextrb : SDNode<"X86ISD::PEXTRB", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pextrw : SDNode<"X86ISD::PEXTRW", - SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; -def X86pinsrb : SDNode<"X86ISD::PINSRB", - SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86pinsrw : SDNode<"X86ISD::PINSRW", - SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, - SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; -def X86insrtps : SDNode<"X86ISD::INSERTPS", - SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, - SDTCisVT<2, v4f32>, SDTCisPtrTy<3>]>>; -def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", - SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; -def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, - [SDNPHasChain, SDNPMayLoad]>; -def X86vshl : SDNode<"X86ISD::VSHL", SDTIntShiftOp>; -def X86vshr : SDNode<"X86ISD::VSRL", SDTIntShiftOp>; -def X86cmpps : SDNode<"X86ISD::CMPPS", SDTX86VFCMP>; -def X86cmppd : SDNode<"X86ISD::CMPPD", SDTX86VFCMP>; -def X86pcmpeqb : SDNode<"X86ISD::PCMPEQB", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqw : SDNode<"X86ISD::PCMPEQW", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqd : SDNode<"X86ISD::PCMPEQD", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpeqq : SDNode<"X86ISD::PCMPEQQ", SDTIntBinOp, [SDNPCommutative]>; -def X86pcmpgtb : SDNode<"X86ISD::PCMPGTB", SDTIntBinOp>; -def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>; -def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>; -def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>; - -def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, - SDTCisVT<1, v4f32>, - SDTCisVT<2, v4f32>]>; -def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; - -//===----------------------------------------------------------------------===// -// SSE Complex Patterns -//===----------------------------------------------------------------------===// - -// These are 'extloads' from a scalar to the low element of a vector, zeroing -// the top elements. These are used for the SSE 'ss' and 'sd' instruction -// forms. -def sse_load_f32 : ComplexPattern; -def sse_load_f64 : ComplexPattern; - -def ssmem : Operand { - let PrintMethod = "printf32mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} -def sdmem : Operand { - let PrintMethod = "printf64mem"; - let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); - let ParserMatchClass = X86MemAsmOperand; -} - -//===----------------------------------------------------------------------===// -// SSE pattern fragments -//===----------------------------------------------------------------------===// - -def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; -def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; -def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>; -def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; - -// Like 'store', but always requires vector alignment. -def alignedstore : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return cast(N)->getAlignment() >= 16; -}]>; - -// Like 'load', but always requires vector alignment. -def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 16; -}]>; - -def alignedloadfsf32 : PatFrag<(ops node:$ptr), - (f32 (alignedload node:$ptr))>; -def alignedloadfsf64 : PatFrag<(ops node:$ptr), - (f64 (alignedload node:$ptr))>; -def alignedloadv4f32 : PatFrag<(ops node:$ptr), - (v4f32 (alignedload node:$ptr))>; -def alignedloadv2f64 : PatFrag<(ops node:$ptr), - (v2f64 (alignedload node:$ptr))>; -def alignedloadv4i32 : PatFrag<(ops node:$ptr), - (v4i32 (alignedload node:$ptr))>; -def alignedloadv2i64 : PatFrag<(ops node:$ptr), - (v2i64 (alignedload node:$ptr))>; - -// Like 'load', but uses special alignment checks suitable for use in -// memory operands in most SSE instructions, which are required to -// be naturally aligned on some targets but not on others. If the subtarget -// allows unaligned accesses, match any load, though this may require -// setting a feature bit in the processor (on startup, for example). -// Opteron 10h and later implement such a feature. -def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return Subtarget->hasVectorUAMem() - || cast(N)->getAlignment() >= 16; -}]>; - -def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; -def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; -def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; -def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; -def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>; -def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; -def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; - -// SSSE3 uses MMX registers for some instructions. They aren't aligned on a -// 16-byte boundary. -// FIXME: 8 byte alignment for mmx reads is not required -def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return cast(N)->getAlignment() >= 8; -}]>; - -def memopv8i8 : PatFrag<(ops node:$ptr), (v8i8 (memop64 node:$ptr))>; -def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>; -def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>; -def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>; - -// MOVNT Support -// Like 'store', but requires the non-temporal bit to be set -def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal(); - return false; -}]>; - -def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal() && !ST->isTruncatingStore() && - ST->getAddressingMode() == ISD::UNINDEXED && - ST->getAlignment() >= 16; - return false; -}]>; - -def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), - (st node:$val, node:$ptr), [{ - if (StoreSDNode *ST = dyn_cast(N)) - return ST->isNonTemporal() && - ST->getAlignment() < 16; - return false; -}]>; - -def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; -def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; -def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; -def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; -def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; -def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; - -def vzmovl_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzmovl - (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; -def vzmovl_v4i32 : PatFrag<(ops node:$src), - (bitconvert (v4i32 (X86vzmovl - (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; - -def vzload_v2i64 : PatFrag<(ops node:$src), - (bitconvert (v2i64 (X86vzload node:$src)))>; - - -def fp32imm0 : PatLeaf<(f32 fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -// BYTE_imm - Transform bit immediates into byte immediates. -def BYTE_imm : SDNodeXForm> 3 - return getI32Imm(N->getZExtValue() >> 3); -}]>; - -// SHUFFLE_get_shuf_imm xform function: convert vector_shuffle mask to PSHUF*, -// SHUFP* etc. imm. -def SHUFFLE_get_shuf_imm : SDNodeXForm; - -// SHUFFLE_get_pshufhw_imm xform function: convert vector_shuffle mask to -// PSHUFHW imm. -def SHUFFLE_get_pshufhw_imm : SDNodeXForm; - -// SHUFFLE_get_pshuflw_imm xform function: convert vector_shuffle mask to -// PSHUFLW imm. -def SHUFFLE_get_pshuflw_imm : SDNodeXForm; - -// SHUFFLE_get_palign_imm xform function: convert vector_shuffle mask to -// a PALIGNR imm. -def SHUFFLE_get_palign_imm : SDNodeXForm; - -def splat_lo : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - ShuffleVectorSDNode *SVOp = cast(N); - return SVOp->isSplat() && SVOp->getSplatIndex() == 0; -}]>; - -def movddup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVDDUPMask(cast(N)); -}]>; - -def movhlps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPSMask(cast(N)); -}]>; - -def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHLPS_v_undef_Mask(cast(N)); -}]>; - -def movlhps : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLHPSMask(cast(N)); -}]>; - -def movlp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLPMask(cast(N)); -}]>; - -def movl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVLMask(cast(N)); -}]>; - -def movshdup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSHDUPMask(cast(N)); -}]>; - -def movsldup : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVSLDUPMask(cast(N)); -}]>; - -def unpckl : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKLMask(cast(N)); -}]>; - -def unpckh : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKHMask(cast(N)); -}]>; - -def unpckl_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKL_v_undef_Mask(cast(N)); -}]>; - -def unpckh_undef : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isUNPCKH_v_undef_Mask(cast(N)); -}]>; - -def pshufd : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFDMask(cast(N)); -}], SHUFFLE_get_shuf_imm>; - -def shufp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isSHUFPMask(cast(N)); -}], SHUFFLE_get_shuf_imm>; - -def pshufhw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFHWMask(cast(N)); -}], SHUFFLE_get_pshufhw_imm>; - -def pshuflw : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPSHUFLWMask(cast(N)); -}], SHUFFLE_get_pshuflw_imm>; - -def palign : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isPALIGNRMask(cast(N)); -}], SHUFFLE_get_palign_imm>; - -//===----------------------------------------------------------------------===// // SSE scalar FP Instructions //===----------------------------------------------------------------------===// @@ -368,857 +52,642 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in { } //===----------------------------------------------------------------------===// -// SSE1 Instructions +// SSE 1 & 2 Instructions Classes //===----------------------------------------------------------------------===// -// Move Instructions. Register-to-register movss is not used for FR32 -// register copies because it's a partial register update; FsMOVAPSrr is -// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movss for copies. -let Constraints = "$src1 = $dst" in -def MOVSSrr : SSI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR32:$src2), - "movss\t{$src2, $dst|$dst, $src2}", - [(set (v4f32 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>; +/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class +multiclass sse12_fp_scalar opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, X86MemOperand x86memop, + bit Is2Addr = 1> { + let isCommutable = 1 in { + def rr : SI; + } + def rm : SI; +} + +/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class +multiclass sse12_fp_scalar_int opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + Operand memopr, ComplexPattern mem_cpat, + bit Is2Addr = 1> { + def rr_Int : SI("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))]>; + def rm_Int : SI("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, mem_cpat:$src2))]>; +} + +/// sse12_fp_packed - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, + RegisterClass RC, ValueType vt, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI; + let mayLoad = 1 in + def rm : PI; +} + +/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class +multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, + string OpcodeStr, X86MemOperand x86memop, + list pat_rr, list pat_rm, + bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : PI; + def rm : PI; +} + +/// sse12_fp_packed_int - SSE 1 & 2 packed instructions intrinsics class +multiclass sse12_fp_packed_int opc, string OpcodeStr, RegisterClass RC, + string asm, string SSEVer, string FPSizeStr, + X86MemOperand x86memop, PatFrag mem_frag, + Domain d, bit Is2Addr = 1> { + def rr_Int : PI("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, RC:$src2))], d>; + def rm_Int : PI("int_x86_sse", + !strconcat(SSEVer, !strconcat("_", + !strconcat(OpcodeStr, FPSizeStr)))) + RC:$src1, (mem_frag addr:$src2)))], d>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Move Instructions +//===----------------------------------------------------------------------===// + +class sse12_move_rr : + SI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, RC:$src2), asm, + [(set (vt VR128:$dst), (movl VR128:$src1, (scalar_to_vector RC:$src2)))]>; + +// Loading from memory automatically zeroing upper bits. +class sse12_move_rm : + SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (mem_pat addr:$src))]>; + +// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 +// register copies because it's a partial register update; FsMOVAPSrr/FsMOVAPDrr +// is used instead. Register-to-register movss/movsd is not modeled as an +// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable +// in terms of a copy, and just mentioned, we don't use movss/movsd for copies. +let isAsmParserOnly = 1 in { + def VMOVSSrr : sse12_move_rr, XS, VEX_4V; + def VMOVSDrr : sse12_move_rr, XD, VEX_4V; + + let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm, XS, VEX; + + let AddedComplexity = 20 in + def VMOVSDrm : sse12_move_rm, XD, VEX; + } +} + +let Constraints = "$src1 = $dst" in { + def MOVSSrr : sse12_move_rr, XS; + def MOVSDrr : sse12_move_rr, XD; +} + +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def MOVSSrm : sse12_move_rm, XS; + + let AddedComplexity = 20 in + def MOVSDrm : sse12_move_rm, XD; +} +let AddedComplexity = 15 in { // Extract the low 32-bit value from one vector and insert it into another. -let AddedComplexity = 15 in def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)), (MOVSSrr (v4f32 VR128:$src1), (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>; +// Extract the low 64-bit value from one vector and insert it into another. +def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), + (MOVSDrr (v2f64 VR128:$src1), + (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; +} // Implicitly promote a 32-bit scalar to a vector. def : Pat<(v4f32 (scalar_to_vector FR32:$src)), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, sub_ss)>; +// Implicitly promote a 64-bit scalar to a vector. +def : Pat<(v2f64 (scalar_to_vector FR64:$src)), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src), - "movss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (loadf32 addr:$src))]>; - +let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. -let AddedComplexity = 20 in { def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), sub_ss)>; +// MOVSDrm zeros the high parts of the register; represent this +// with SUBREG_TO_REG. +def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +def : Pat<(v2f64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; } // Store scalar value to memory. def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>; +def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>; + +let isAsmParserOnly = 1 in { +def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), + "movss\t{$src, $dst|$dst, $src}", + [(store FR32:$src, addr:$dst)]>, XS, VEX_4V; +def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), + "movsd\t{$src, $dst|$dst, $src}", + [(store FR64:$src, addr:$dst)]>, XD, VEX_4V; +} // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), addr:$dst), (MOVSSmr addr:$dst, (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; +def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + addr:$dst), + (MOVSDmr addr:$dst, + (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; -// Conversion instructions -def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR32:$src))]>; -def CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf32 addr:$src)))]>; -def CVTSI2SSrr : SSI<0x2A, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SSrm : SSI<0x2A, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), - "cvtsi2ss\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (sint_to_fp (loadi32 addr:$src)))]>; - -// Match intrinsics which expect XMM operand(s). -def CVTSS2SIrr: SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; -def CVTSS2SIrm: SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si{l}\t{$src, $dst|$dst, $src}", []>; - -def Int_CVTSS2SIrr : SSI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si VR128:$src))]>; -def Int_CVTSS2SIrm : SSI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvtss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_cvtss2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPS2PIrr : PSI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi VR128:$src))]>; -def Int_CVTPS2PIrm : PSI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvtps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtps2pi - (load addr:$src)))]>; -def Int_CVTTPS2PIrr: PSI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi VR128:$src))]>; -def Int_CVTTPS2PIrm: PSI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src), - "cvttps2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttps2pi - (load addr:$src)))]>; -let Constraints = "$src1 = $dst" in { - def Int_CVTPI2PSrr : PSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR64:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - VR64:$src2))]>; - def Int_CVTPI2PSrm : PSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i64mem:$src2), - "cvtpi2ps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtpi2ps VR128:$src1, - (load addr:$src2)))]>; -} - -// Aliases for intrinsics -def Int_CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si VR128:$src))]>; -def Int_CVTTSS2SIrm : SSI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f32mem:$src), - "cvttss2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse_cvttss2si(load addr:$src)))]>; - -let Constraints = "$src1 = $dst" in { - def Int_CVTSI2SSrr : SSI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - GR32:$src2))]>; - def Int_CVTSI2SSrm : SSI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse_cvtsi2ss VR128:$src1, - (loadi32 addr:$src2)))]>; -} - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", []>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPSSrr_alt : SSIi8<0xC2, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSSrm_alt : SSIi8<0xC2, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f32mem:$src, i8imm:$src2), - "cmpss\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} -} - -let Defs = [EFLAGS] in { -def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; -def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; - -def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; -def COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", []>; - -} // Defs = [EFLAGS] - -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSSrr : SSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss - VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSSrm : SSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f32mem:$src, SSECC:$cc), - "cmp${cc}ss\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ss VR128:$src1, - (load addr:$src), imm:$cc))]>; -} - -let Defs = [EFLAGS] in { -def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - VR128:$src2))]>; -def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comiss\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v4f32 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE1 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in - // FIXME: Set encoding to pseudo! -def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", - [(set FR32:$dst, fp32imm0)]>, - Requires<[HasSSE1]>, TB, OpSize; - -// Alias instruction to do FR32 reg-to-reg copy using movaps. Upper bits are -// disregarded. +// Move Aligned/Unaligned floating point values +multiclass sse12_mov_packed opc, RegisterClass RC, + X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d, + bit IsReMaterializable = 1> { let neverHasSideEffects = 1 in -def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR32 from f128mem using movaps. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), - "movaps\t{$src, $dst|$dst, $src}", - [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPSrr : PSI<0x54, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, FR32:$src2))]>; - def FsORPSrr : PSI<0x56, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, FR32:$src2))]>; - def FsXORPSrr : PSI<0x57, MRMSrcReg, (outs FR32:$dst), - (ins FR32:$src1, FR32:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, FR32:$src2))]>; -} - -def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fand FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86for FR32:$src1, - (memopfsf32 addr:$src2)))]>; -def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), - (ins FR32:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set FR32:$dst, (X86fxor FR32:$src1, - (memopfsf32 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPSrr : PSI<0x55, MRMSrcReg, - (outs FR32:$dst), (ins FR32:$src1, FR32:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPSrm : PSI<0x55, MRMSrcMem, - (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", []>; + def rr : PI; +let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in + def rm : PI; } -} - -/// basic_sse1_fp_binop_rm - SSE1 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse1_fp_binop_rm opc, string OpcodeStr, - SDNode OpNode, Intrinsic F32Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SSrr : SSI { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI; - - // Vector operation, reg+reg. - def PSrr : PSI { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI; - - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI; -} -} - -// Arithmetic instructions -defm ADD : basic_sse1_fp_binop_rm<0x58, "add", fadd, int_x86_sse_add_ss, 1>; -defm MUL : basic_sse1_fp_binop_rm<0x59, "mul", fmul, int_x86_sse_mul_ss, 1>; -defm SUB : basic_sse1_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse_sub_ss>; -defm DIV : basic_sse1_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse_div_ss>; - -/// sse1_fp_binop_rm - Other SSE1 binops -/// -/// This multiclass is like basic_sse1_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse1_fp_binop_rm opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SSrr : SSI { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SSrm : SSI; - - // Vector operation, reg+reg. - def PSrr : PSI { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PSrm : PSI; - - // Intrinsic operation, reg+reg. - def SSrr_Int : SSI { - let isCommutable = Commutable; - } - // Intrinsic operation, reg+mem. - def SSrm_Int : SSI; - - // Vector intrinsic operation, reg+reg. - def PSrr_Int : PSI { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, reg+mem. - def PSrm_Int : PSI; -} +let isAsmParserOnly = 1 in { +defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; + +defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, + "movaps", SSEPackedSingle>, VEX; +defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, + "movapd", SSEPackedDouble>, OpSize, VEX; +defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, + "movups", SSEPackedSingle>, VEX; +defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, + "movupd", SSEPackedDouble, 0>, OpSize, VEX; } +defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, + "movaps", SSEPackedSingle>, TB; +defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, + "movapd", SSEPackedDouble>, TB, OpSize; +defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, + "movups", SSEPackedSingle>, TB; +defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, + "movupd", SSEPackedDouble, 0>, TB, OpSize; -defm MAX : sse1_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse_max_ss, int_x86_sse_max_ps>; -defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse_min_ss, int_x86_sse_min_ps>; - -//===----------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movaps\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), +let isAsmParserOnly = 1 in { +def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv4f32 addr:$src))]>; - + [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v4f32 VR128:$src), addr:$dst)]>, VEX; +def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>, VEX; +def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(alignedstore (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v4f64 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movups\t{$src, $dst|$dst, $src}", + [(store (v8f32 VR256:$src), addr:$dst)]>, VEX; +def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; +} def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv4f32 addr:$src))]>; +def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(store (v4f32 VR128:$src), addr:$dst)]>; +def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(store (v2f64 VR128:$src), addr:$dst)]>; -// Intrinsic forms of MOVUPS load and store +// Intrinsic forms of MOVUPS/D load and store +let isAsmParserOnly = 1 in { + let canFoldAsLoad = 1, isReMaterializable = 1 in + def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movups\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; + def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; + def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; +} let canFoldAsLoad = 1, isReMaterializable = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; +def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; + def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>; +def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPSrm : PSI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlp VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - def MOVHPSrm : PSI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (movlhps VR128:$src1, - (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - +// Move Low/High packed floating point values +multiclass sse12_mov_hilo_packedopc, RegisterClass RC, + PatFrag mov_frag, string base_opc, + string asm_opr> { + def PSrm : PI, TB; + + def PDrm : PI, TB, OpSize; +} -def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), - (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; +let isAsmParserOnly = 1, AddedComplexity = 20 in { + defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; + defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + defm MOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", + "\t{$src2, $dst|$dst, $src2}">; + defm MOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", + "\t{$src2, $dst|$dst, $src2}">; +} +let isAsmParserOnly = 1 in { +def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), + (iPTR 0))), addr:$dst)]>, VEX; +def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), (iPTR 0))), addr:$dst)]>; +def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movlpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract (v2f64 VR128:$src), + (iPTR 0))), addr:$dst)]>; // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. +let isAsmParserOnly = 1 in { +def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhps\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (unpckh (bc_v2f64 (v4f32 VR128:$src)), + (undef)), (iPTR 0))), addr:$dst)]>, + VEX; +def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>, + VEX; +} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (unpckh (bc_v2f64 (v4f32 VR128:$src)), (undef)), (iPTR 0))), addr:$dst)]>; +def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), + "movhpd\t{$src, $dst|$dst, $src}", + [(store (f64 (vector_extract + (v2f64 (unpckh VR128:$src, (undef))), + (iPTR 0))), addr:$dst)]>; -let Constraints = "$src1 = $dst" in { +let isAsmParserOnly = 1, AddedComplexity = 20 in { + def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>, + VEX_4V; + def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>, + VEX_4V; +} +let Constraints = "$src1 = $dst", AddedComplexity = 20 in { + def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movlhps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; + def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2), + "movhlps\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, + (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; +} + +def : Pat<(movlhps VR128:$src1, (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), + (MOVHPSrm (v4i32 VR128:$src1), addr:$src2)>; let AddedComplexity = 20 in { -def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movlhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; + def : Pat<(v4f32 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; + def : Pat<(v2i64 (movddup VR128:$src, (undef))), + (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; +} -def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2), - "movhlps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (movhlps VR128:$src1, VR128:$src2)))]>; -} // AddedComplexity -} // Constraints = "$src1 = $dst" +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Conversion Instructions +//===----------------------------------------------------------------------===// -let AddedComplexity = 20 in { -def : Pat<(v4f32 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v4f32 VR128:$src), (v4f32 VR128:$src))>; -def : Pat<(v2i64 (movddup VR128:$src, (undef))), - (MOVLHPSrr (v2i64 VR128:$src), (v2i64 VR128:$src))>; +multiclass sse12_cvt_s opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI; + def rm : SI; } +multiclass sse12_cvt_p opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI; + def rm : PI; +} +multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, + SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI; + def rm : SI; +} -// Arithmetic +let isAsmParserOnly = 1 in { +defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}">, XS, + VEX_4V; +defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}">, XD, + VEX_4V; +} -/// sse1_fp_unop_rm - SSE1 unops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a -/// scalar) and leaves the top elements undefined. -/// -/// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse1_fp_unop_rm opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F32Int, - Intrinsic V4F32Int, - bit Commutable = 0> { - // Scalar operation, reg. - def SSr : SSI { - let isCommutable = Commutable; - } - - // Scalar operation, mem. - def SSm : I, XS, - Requires<[HasSSE1, OptForSize]>; - - // Vector operation, reg. - def PSr : PSI { - let isCommutable = Commutable; - } - - // Vector operation, mem. - def PSm : PSI; - - // Intrinsic operation, reg. - def SSr_Int : SSI { - let isCommutable = Commutable; - } +defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, + "cvttss2si\t{$src, $dst|$dst, $src}">, XS; +defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, + "cvttsd2si\t{$src, $dst|$dst, $src}">, XD; +defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, + "cvtsi2ss\t{$src, $dst|$dst, $src}">, XS; +defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, + "cvtsi2sd\t{$src, $dst|$dst, $src}">, XD; + +// Conversion Instructions Intrinsics - Match intrinsics which expect MM +// and/or XMM operand(s). +multiclass sse12_cvt_pint opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm, Domain d> { + def rr : PI; + def rm : PI; +} - // Intrinsic operation, mem. - def SSm_Int : SSI; +multiclass sse12_cvt_sint opc, RegisterClass SrcRC, RegisterClass DstRC, + Intrinsic Int, X86MemOperand x86memop, PatFrag ld_frag, + string asm> { + def rr : SI; + def rm : SI; +} - // Vector intrinsic operation, reg - def PSr_Int : PSI { - let isCommutable = Commutable; - } +multiclass sse12_cvt_pint_3addr opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm, Domain d> { + def rr : PI; + def rm : PI; +} - // Vector intrinsic operation, mem - def PSm_Int : PSI; +multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, + RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, + PatFrag ld_frag, string asm> { + def rr : SI; + def rm : SI; } -// Square root. -defm SQRT : sse1_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse_sqrt_ss, int_x86_sse_sqrt_ps>; +let isAsmParserOnly = 1 in { + defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS, + VEX; + defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, + VEX; +} +defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si\t{$src, $dst|$dst, $src}">, XS; +defm Int_CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si\t{$src, $dst|$dst, $src}">, XD; -// Reciprocal approximations. Note that these typically require refinement -// in order to obtain suitable precision. -defm RSQRT : sse1_fp_unop_rm<0x52, "rsqrt", X86frsqrt, - int_x86_sse_rsqrt_ss, int_x86_sse_rsqrt_ps>; -defm RCP : sse1_fp_unop_rm<0x53, "rcp", X86frcp, - int_x86_sse_rcp_ss, int_x86_sse_rcp_ps>; -// Logical let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPSrr : PSI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (and VR128:$src1, VR128:$src2)))]>; - def ORPSrr : PSI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (or VR128:$src1, VR128:$src2)))]>; - def XORPSrr : PSI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 - (xor VR128:$src1, VR128:$src2)))]>; - } - - def ANDPSrm : PSI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (and (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPSrm : PSI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (or (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPSrm : PSI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (xor (bc_v2i64 (v4f32 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPSrr : PSI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor VR128:$src1, - (bc_v2i64 (v4i32 immAllOnesV))), - VR128:$src2)))]>; - def ANDNPSrm : PSI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), - (bc_v2i64 (v4i32 immAllOnesV))), - (memopv2i64 addr:$src2))))]>; + defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XS; + defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, + "cvtsi2ss\t{$src2, $dst|$dst, $src2}">, XD; } +// Instructions below don't have an AVX form. +defm Int_CVTPS2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtps2pi, + f64mem, load, "cvtps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTPD2PI : sse12_cvt_pint<0x2D, VR128, VR64, int_x86_sse_cvtpd2pi, + f128mem, memop, "cvtpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTTPS2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttps2pi, + f64mem, load, "cvttps2pi\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; +defm Int_CVTTPD2PI : sse12_cvt_pint<0x2C, VR128, VR64, int_x86_sse_cvttpd2pi, + f128mem, memop, "cvttpd2pi\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; +defm Int_CVTPI2PD : sse12_cvt_pint<0x2A, VR64, VR128, int_x86_sse_cvtpi2pd, + i64mem, load, "cvtpi2pd\t{$src, $dst|$dst, $src}", + SSEPackedDouble>, TB, OpSize; let Constraints = "$src1 = $dst" in { - def CMPPSrri : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPSrmi : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cmp_ps VR128:$src1, - (memop addr:$src), imm:$cc))]>; - - // Accept explicit immediate argument form instead of comparison code. -let isAsmParserOnly = 1 in { - def CMPPSrri_alt : PSIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; - def CMPPSrmi_alt : PSIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmpps\t{$src2, $src, $dst|$dst, $src, $src}", []>; + defm Int_CVTPI2PS : sse12_cvt_pint_3addr<0x2A, VR64, VR128, + int_x86_sse_cvtpi2ps, + i64mem, load, "cvtpi2ps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; } -} -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; -def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; - -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - let isConvertibleToThreeAddress = 1 in // Convert to pshufd - def SHUFPSrri : PSIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - VR128:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPSrmi : PSIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v4f32 (shufp:$src3 - VR128:$src1, (memopv4f32 addr:$src2))))]>; - - let AddedComplexity = 10 in { - def UNPCKHPSrr : PSI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPSrm : PSI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckh VR128:$src1, - (memopv4f32 addr:$src2))))]>; - - def UNPCKLPSrr : PSI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4f32 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPSrm : PSI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv4f32 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -// Mask creation -def MOVMSKPSrr : PSI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse_movmsk_ps VR128:$src))]>; -def MOVMSKPDrr : PDI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_movmsk_pd VR128:$src))]>; - -// Prefetch intrinsic. -def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), - "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; -def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), - "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; -def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), - "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; -def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), - "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; - -// Non-temporal stores -def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; - -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; - -def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; -def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; +/// SSE 1 Only -def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, - TB, Requires<[HasSSE2]>; +// Aliases for intrinsics +let isAsmParserOnly = 1, Pattern = [] in { +defm Int_VCVTTSS2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse_cvttss2si, f32mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS; +defm Int_VCVTTSD2SI : sse12_cvt_sint_3addr<0x2C, VR128, GR32, + int_x86_sse2_cvttsd2si, f128mem, load, + "cvttss2si\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD; } - -// Load, store, and memory fence -def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, - TB, Requires<[HasSSE1]>; - -// MXCSR register -def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; -def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; - -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-zeros value if folding it would be beneficial. -// FIXME: Change encoding to pseudo! -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1 in { -def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4f32 immAllZerosV))]>; -def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v2f64 immAllZerosV))]>; -let ExeDomain = SSEPackedInt in -def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllZerosV))]>; +defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, + f32mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XS; +defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, + f128mem, load, "cvttss2si\t{$src, $dst|$dst, $src}">, + XD; + +let isAsmParserOnly = 1, Pattern = [] in { +defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; +defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB, VEX; } - -def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; -def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; -def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; - -def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), - (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; - -//===---------------------------------------------------------------------===// -// SSE2 Instructions -//===---------------------------------------------------------------------===// - -// Move Instructions. Register-to-register movsd is not used for FR64 -// register copies because it's a partial register update; FsMOVAPDrr is -// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG -// because INSERT_SUBREG requires that the insert be implementable in terms of -// a copy, and just mentioned, we don't use movsd for copies. -let Constraints = "$src1 = $dst" in -def MOVSDrr : SDI<0x10, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, FR64:$src2), - "movsd\t{$src2, $dst|$dst, $src2}", - [(set (v2f64 VR128:$dst), - (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>; - -// Extract the low 64-bit value from one vector and insert it into another. -let AddedComplexity = 15 in -def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)), - (MOVSDrr (v2f64 VR128:$src1), - (EXTRACT_SUBREG (v2f64 VR128:$src2), sub_sd))>; - -// Implicitly promote a 64-bit scalar to a vector. -def : Pat<(v2f64 (scalar_to_vector FR64:$src)), - (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, sub_sd)>; - -// Loading from memory automatically zeroing upper bits. -let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in -def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (loadf64 addr:$src))]>; - -// MOVSDrm zeros the high parts of the register; represent this -// with SUBREG_TO_REG. -let AddedComplexity = 20 in { -def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; -def : Pat<(v2f64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), sub_sd)>; +let Pattern = [] in { +defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/, + "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS; +defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load /*dummy*/, + "cvtdq2ps\t{$src, $dst|$dst, $src}", + SSEPackedSingle>, TB; /* PD SSE3 form is avaiable */ } -// Store scalar value to memory. -def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), - "movsd\t{$src, $dst|$dst, $src}", - [(store FR64:$src, addr:$dst)]>; - -// Extract and store. -def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - addr:$dst), - (MOVSDmr addr:$dst, - (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; +/// SSE 2 Only -// Conversion instructions -def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint FR64:$src))]>; -def CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f64mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (fp_to_sint (loadf64 addr:$src)))]>; +// Convert scalar double to scalar single +let isAsmParserOnly = 1 in { +def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), + (ins FR64:$src1, FR64:$src2), + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; +def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), + (ins FR64:$src1, f64mem:$src2), + "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; +} def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround FR64:$src))]>; @@ -1226,35 +695,28 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), "cvtsd2ss\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, Requires<[HasSSE2, OptForSize]>; -def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp GR32:$src))]>; -def CVTSI2SDrm : SDI<0x2A, MRMSrcMem, (outs FR64:$dst), (ins i32mem:$src), - "cvtsi2sd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (sint_to_fp (loadi32 addr:$src)))]>; -def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrr : PSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def CVTDQ2PSrm : PSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", []>; -def COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; -def COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", []>; - -// SSE2 instructions with XS prefix +let isAsmParserOnly = 1 in +defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, + XS, VEX_4V; +let Constraints = "$src1 = $dst" in +defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, + int_x86_sse2_cvtsd2ss, f64mem, load, + "cvtsd2ss\t{$src2, $dst|$dst, $src2}">, XS; + +// Convert scalar single to scalar double +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), + (ins FR32:$src1, FR32:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, Requires<[HasAVX]>, VEX_4V; +def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), + (ins FR32:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; +} def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), "cvtss2sd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (fextend FR32:$src))]>, XS, @@ -1264,415 +726,106 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; +let isAsmParserOnly = 1 in { +def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, VEX_4V, + Requires<[HasAVX]>; +def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, VEX_4V, + Requires<[HasAVX]>; +} +let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix +def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + VR128:$src2))]>, XS, + Requires<[HasSSE2]>; +def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), + "cvtss2sd\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, + (load addr:$src2)))]>, XS, + Requires<[HasSSE2]>; +} + def : Pat<(extloadf32 addr:$src), (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>; -// Match intrinsics which expect XMM operand(s). -def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si VR128:$src))]>; -def Int_CVTSD2SIrm : SDI<0x2D, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvtsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvtsd2si - (load addr:$src)))]>; - -// Match intrinsics which expect MM and XMM operand(s). -def Int_CVTPD2PIrr : PDI<0x2D, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi VR128:$src))]>; -def Int_CVTPD2PIrm : PDI<0x2D, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvtpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvtpd2pi - (memop addr:$src)))]>; -def Int_CVTTPD2PIrr: PDI<0x2C, MRMSrcReg, (outs VR64:$dst), (ins VR128:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi VR128:$src))]>; -def Int_CVTTPD2PIrm: PDI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f128mem:$src), - "cvttpd2pi\t{$src, $dst|$dst, $src}", - [(set VR64:$dst, (int_x86_sse_cvttpd2pi - (memop addr:$src)))]>; -def Int_CVTPI2PDrr : PDI<0x2A, MRMSrcReg, (outs VR128:$dst), (ins VR64:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd VR64:$src))]>; -def Int_CVTPI2PDrm : PDI<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtpi2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_cvtpi2pd - (load addr:$src)))]>; +// Convert doubleword to packed single/double fp +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + TB, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vcvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps + (bitconvert (memopv2i64 addr:$src))))]>, + TB, VEX, Requires<[HasAVX]>; +} +def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, + TB, Requires<[HasSSE2]>; +def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "cvtdq2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2ps + (bitconvert (memopv2i64 addr:$src))))]>, + TB, Requires<[HasSSE2]>; -// Aliases for intrinsics -def Int_CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, - (int_x86_sse2_cvttsd2si VR128:$src))]>; -def Int_CVTTSD2SIrm : SDI<0x2C, MRMSrcMem, (outs GR32:$dst), (ins f128mem:$src), - "cvttsd2si\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_cvttsd2si - (load addr:$src)))]>; - -// Comparison instructions -let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { - def CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; -let mayLoad = 1 in - def CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", []>; +// FIXME: why the non-intrinsic version is described as SSE3? +let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd + (bitconvert (memopv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; +} +def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, + XS, Requires<[HasSSE2]>; +def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtdq2pd + (bitconvert (memopv2i64 addr:$src))))]>, + XS, Requires<[HasSSE2]>; - // Accept explicit immediate argument form instead of comparison code. +// Convert packed single/double fp to doubleword let isAsmParserOnly = 1 in { - def CMPSDrr_alt : SDIi8<0xC2, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -let mayLoad = 1 in - def CMPSDrm_alt : SDIi8<0xC2, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f64mem:$src, i8imm:$src2), - "cmpsd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; +def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; } -} - -let Defs = [EFLAGS] in { -def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; -def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; -} // Defs = [EFLAGS] +def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -// Aliases to match intrinsics which expect XMM operand(s). -let Constraints = "$src1 = $dst" in { - def Int_CMPSDrr : SDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), - (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - VR128:$src, imm:$cc))]>; - def Int_CMPSDrm : SDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), - (ins VR128:$src1, f64mem:$src, SSECC:$cc), - "cmp${cc}sd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_sd VR128:$src1, - (load addr:$src), imm:$cc))]>; +let isAsmParserOnly = 1 in { +def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, + VEX; +def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2dq + (memop addr:$src)))]>, VEX; } - -let Defs = [EFLAGS] in { -def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), - "ucomisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), - (load addr:$src2)))]>; - -def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - VR128:$src2))]>; -def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), - "comisd\t{$src2, $src1|$src1, $src2}", - [(set EFLAGS, (X86comi (v2f64 VR128:$src1), - (load addr:$src2)))]>; -} // Defs = [EFLAGS] - -// Aliases of packed SSE2 instructions for scalar use. These all have names -// that start with 'Fs'. - -// Alias instructions that map fld0 to pxor for sse. -let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, - canFoldAsLoad = 1 in -def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, - Requires<[HasSSE2]>, TB, OpSize; - -// Alias instruction to do FR64 reg-to-reg copy using movapd. Upper bits are -// disregarded. -let neverHasSideEffects = 1 in -def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; - -// Alias instruction to load FR64 from f128mem using movapd. Upper bits are -// disregarded. -let canFoldAsLoad = 1, isReMaterializable = 1 in -def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; - -// Alias bitwise logical operations using SSE logical ops on packed FP values. -let Constraints = "$src1 = $dst" in { -let isCommutable = 1 in { - def FsANDPDrr : PDI<0x54, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, FR64:$src2))]>; - def FsORPDrr : PDI<0x56, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, FR64:$src2))]>; - def FsXORPDrr : PDI<0x57, MRMSrcReg, (outs FR64:$dst), - (ins FR64:$src1, FR64:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, FR64:$src2))]>; -} - -def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fand FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86for FR64:$src1, - (memopfsf64 addr:$src2)))]>; -def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), - (ins FR64:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set FR64:$dst, (X86fxor FR64:$src1, - (memopfsf64 addr:$src2)))]>; - -let neverHasSideEffects = 1 in { -def FsANDNPDrr : PDI<0x55, MRMSrcReg, - (outs FR64:$dst), (ins FR64:$src1, FR64:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -let mayLoad = 1 in -def FsANDNPDrm : PDI<0x55, MRMSrcMem, - (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", []>; -} -} - -/// basic_sse2_fp_binop_rm - SSE2 binops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a scalar) -/// and leaves the top elements unmodified (therefore these cannot be commuted). -/// -/// These three forms can each be reg+reg or reg+mem, so there are a total of -/// six "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass basic_sse2_fp_binop_rm opc, string OpcodeStr, - SDNode OpNode, Intrinsic F64Int, - bit Commutable = 0> { - // Scalar operation, reg+reg. - def SDrr : SDI { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI; - - // Vector operation, reg+reg. - def PDrr : PDI { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI; - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI; -} -} - -// Arithmetic instructions -defm ADD : basic_sse2_fp_binop_rm<0x58, "add", fadd, int_x86_sse2_add_sd, 1>; -defm MUL : basic_sse2_fp_binop_rm<0x59, "mul", fmul, int_x86_sse2_mul_sd, 1>; -defm SUB : basic_sse2_fp_binop_rm<0x5C, "sub", fsub, int_x86_sse2_sub_sd>; -defm DIV : basic_sse2_fp_binop_rm<0x5E, "div", fdiv, int_x86_sse2_div_sd>; - -/// sse2_fp_binop_rm - Other SSE2 binops -/// -/// This multiclass is like basic_sse2_fp_binop_rm, with the addition of -/// instructions for a full-vector intrinsic form. Operations that map -/// onto C operators don't use this form since they just use the plain -/// vector form instead of having a separate vector intrinsic form. -/// -/// This provides a total of eight "instructions". -/// -let Constraints = "$src1 = $dst" in { -multiclass sse2_fp_binop_rm opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - - // Scalar operation, reg+reg. - def SDrr : SDI { - let isCommutable = Commutable; - } - - // Scalar operation, reg+mem. - def SDrm : SDI; - - // Vector operation, reg+reg. - def PDrr : PDI { - let isCommutable = Commutable; - } - - // Vector operation, reg+mem. - def PDrm : PDI; - - // Intrinsic operation, reg+reg. - def SDrr_Int : SDI { - let isCommutable = Commutable; - } - - // Intrinsic operation, reg+mem. - def SDrm_Int : SDI; - - // Vector intrinsic operation, reg+reg. - def PDrr_Int : PDI { - let isCommutable = Commutable; - } - - // Vector intrinsic operation, reg+mem. - def PDrm_Int : PDI; -} -} - -defm MAX : sse2_fp_binop_rm<0x5F, "max", X86fmax, - int_x86_sse2_max_sd, int_x86_sse2_max_pd>; -defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin, - int_x86_sse2_min_sd, int_x86_sse2_min_pd>; - -//===---------------------------------------------------------------------===// -// SSE packed FP Instructions - -// Move Instructions -let neverHasSideEffects = 1 in -def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (alignedloadv2f64 addr:$src))]>; - -def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movapd\t{$src, $dst|$dst, $src}", - [(alignedstore (v2f64 VR128:$src), addr:$dst)]>; - -let neverHasSideEffects = 1 in -def MOVUPDrr : PDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in -def MOVUPDrm : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (loadv2f64 addr:$src))]>; -def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(store (v2f64 VR128:$src), addr:$dst)]>; - -// Intrinsic forms of MOVUPD load and store -def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; -def MOVUPDmr_Int : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>; - -let Constraints = "$src1 = $dst" in { - let AddedComplexity = 20 in { - def MOVLPDrm : PDI<0x12, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movlpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlp VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - def MOVHPDrm : PDI<0x16, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "movhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (movlhps VR128:$src1, - (scalar_to_vector (loadf64 addr:$src2)))))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" - -def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movlpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract (v2f64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -// v2f64 extract element 1 is always custom lowered to unpack high to low -// and extract element 0 so the non-store version isn't too horrible. -def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), - "movhpd\t{$src, $dst|$dst, $src}", - [(store (f64 (vector_extract - (v2f64 (unpckh VR128:$src, (undef))), - (iPTR 0))), addr:$dst)]>; - -// SSE2 instructions without OpSize prefix -def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, - TB, Requires<[HasSSE2]>; -def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "cvtdq2ps\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2ps - (bitconvert (memopv2i64 addr:$src))))]>, - TB, Requires<[HasSSE2]>; - -// SSE2 instructions with XS prefix -def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, - XS, Requires<[HasSSE2]>; -def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "cvtdq2pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtdq2pd - (bitconvert (memopv2i64 addr:$src))))]>, - XS, Requires<[HasSSE2]>; - def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; @@ -1680,12 +833,54 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; -// SSE2 packed instructions with XS prefix + +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix +def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, VEX, Requires<[HasAVX]>; +def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, VEX, Requires<[HasAVX]>; +} +def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, + XD, Requires<[HasSSE2]>; +def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2dq + (memop addr:$src)))]>, + XD, Requires<[HasSSE2]>; + + +// Convert with truncation packed single/double fp to doubleword +let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix +def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; +} def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>; + +let isAsmParserOnly = 1 in { +def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (int_x86_sse2_cvttps2dq VR128:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvttps2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttps2dq + (memop addr:$src)))]>, + XS, VEX, Requires<[HasAVX]>; +} def Int_CVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -1697,17 +892,18 @@ def Int_CVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (memop addr:$src)))]>, XS, Requires<[HasSSE2]>; -// SSE2 packed instructions with XD prefix -def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, - XD, Requires<[HasSSE2]>; -def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "cvtpd2dq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cvtpd2dq - (memop addr:$src)))]>, - XD, Requires<[HasSSE2]>; - +let isAsmParserOnly = 1 in { +def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>, + VEX; +def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvttpd2dq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvttpd2dq + (memop addr:$src)))]>, VEX; +} def Int_CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -1716,12 +912,31 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; -// SSE2 instructions without OpSize prefix +// Convert packed single to packed double +let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX, + Requires<[HasAVX]>; +} def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; +let isAsmParserOnly = 1 in { +def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, + VEX, Requires<[HasAVX]>; +def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + "cvtps2pd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtps2pd + (load addr:$src)))]>, + VEX, Requires<[HasAVX]>; +} def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1732,12 +947,29 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), (load addr:$src)))]>, TB, Requires<[HasSSE2]>; +// Convert packed double to packed single +let isAsmParserOnly = 1 in { +def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX; +// FIXME: the memory form of this instruction should described using +// use extra asm syntax +} def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; +let isAsmParserOnly = 1 in { +def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; +def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), + (ins f128mem:$src), + "cvtpd2ps\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_cvtpd2ps + (memop addr:$src)))]>; +} def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1746,269 +978,1039 @@ def Int_CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; -// Match intrinsics which expect XMM operand(s). -// Aliases for intrinsics -let Constraints = "$src1 = $dst" in { -def Int_CVTSI2SDrr: SDI<0x2A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, GR32:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - GR32:$src2))]>; -def Int_CVTSI2SDrm: SDI<0x2A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i32mem:$src2), - "cvtsi2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsi2sd VR128:$src1, - (loadi32 addr:$src2)))]>; -def Int_CVTSD2SSrr: SDI<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - VR128:$src2))]>; -def Int_CVTSD2SSrm: SDI<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), - "cvtsd2ss\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtsd2ss VR128:$src1, - (load addr:$src2)))]>; -def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - VR128:$src2))]>, XS, - Requires<[HasSSE2]>; -def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f32mem:$src2), - "cvtss2sd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, - (load addr:$src2)))]>, XS, - Requires<[HasSSE2]>; +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Compare Instructions +//===----------------------------------------------------------------------===// + +// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions +multiclass sse12_cmp_scalar { + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), + asm, []>; + let mayLoad = 1 in + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), + asm, []>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; + } } -// Arithmetic +let neverHasSideEffects = 1, isAsmParserOnly = 1 in { + defm VCMPSS : sse12_cmp_scalar, + XS, VEX_4V; + defm VCMPSD : sse12_cmp_scalar, + XD, VEX_4V; +} -/// sse2_fp_unop_rm - SSE2 unops come in both scalar and vector forms. -/// -/// In addition, we also have a special variant of the scalar form here to -/// represent the associated intrinsic operation. This form is unlike the -/// plain scalar form, in that it takes an entire vector (instead of a -/// scalar) and leaves the top elements undefined. -/// -/// And, we have a special variant form for a full-vector intrinsic form. -/// -/// These four forms can each have a reg or a mem operand, so there are a -/// total of eight "instructions". -/// -multiclass sse2_fp_unop_rm opc, string OpcodeStr, - SDNode OpNode, - Intrinsic F64Int, - Intrinsic V2F64Int, - bit Commutable = 0> { - // Scalar operation, reg. - def SDr : SDI { - let isCommutable = Commutable; - } +let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in { + defm CMPSS : sse12_cmp_scalar, XS; + defm CMPSD : sse12_cmp_scalar, XD; +} - // Scalar operation, mem. - def SDm : SDI; +multiclass sse12_cmp_scalar_int { + def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src, imm:$cc))]>; + def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f32mem:$src, SSECC:$cc), asm, + [(set VR128:$dst, (Int VR128:$src1, + (load addr:$src), imm:$cc))]>; +} - // Vector operation, reg. - def PDr : PDI { - let isCommutable = Commutable; - } +// Aliases to match intrinsics which expect XMM operand(s). +let isAsmParserOnly = 1 in { + defm Int_VCMPSS : sse12_cmp_scalar_int, + XS, VEX_4V; + defm Int_VCMPSD : sse12_cmp_scalar_int, + XD, VEX_4V; +} +let Constraints = "$src1 = $dst" in { + defm Int_CMPSS : sse12_cmp_scalar_int, XS; + defm Int_CMPSD : sse12_cmp_scalar_int, XD; +} - // Vector operation, mem. - def PDm : PDI; - // Intrinsic operation, reg. - def SDr_Int : SDI { - let isCommutable = Commutable; - } +// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS +multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, + ValueType vt, X86MemOperand x86memop, + PatFrag ld_frag, string OpcodeStr, Domain d> { + def rr: PI; + def rm: PI; +} - // Intrinsic operation, mem. - def SDm_Int : SDI; +let Defs = [EFLAGS] in { + let isAsmParserOnly = 1 in { + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = [] in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; + } - // Vector intrinsic operation, reg - def PDr_Int : PDI { - let isCommutable = Commutable; + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; + } + defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, TB; + defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, TB, OpSize; + + let Pattern = [] in { + defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; } - // Vector intrinsic operation, mem - def PDm_Int : PDI; -} + defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, TB; + defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, TB, OpSize; -// Square root. -defm SQRT : sse2_fp_unop_rm<0x51, "sqrt", fsqrt, - int_x86_sse2_sqrt_sd, int_x86_sse2_sqrt_pd>; + defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, TB; + defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, TB, OpSize; +} // Defs = [EFLAGS] -// There is no f64 version of the reciprocal approximation instructions. +// sse12_cmp_packed - sse 1 & 2 compared packed instructions +multiclass sse12_cmp_packed { + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; + // Accept explicit immediate argument form instead of comparison code. + let isAsmParserOnly = 1 in { + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; + } +} -// Logical +let isAsmParserOnly = 1 in { + defm VCMPPS : sse12_cmp_packed, VEX_4V; + defm VCMPPD : sse12_cmp_packed, OpSize, VEX_4V; +} let Constraints = "$src1 = $dst" in { - let isCommutable = 1 in { - def ANDPDrr : PDI<0x54, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ORPDrr : PDI<0x56, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def XORPDrr : PDI<0x57, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (bc_v2i64 (v2f64 VR128:$src2))))]>; + defm CMPPS : sse12_cmp_packed, TB; + defm CMPPD : sse12_cmp_packed, TB, OpSize; +} + +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; +def : Pat<(v4i32 (X86cmpps (v4f32 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), + (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; +def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), + (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Shuffle Instructions +//===----------------------------------------------------------------------===// + +/// sse12_shuffle - sse 1 & 2 shuffle instructions +multiclass sse12_shuffle { + def rmi : PIi8<0xC6, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, f128mem:$src2, i8imm:$src3), asm, + [(set VR128:$dst, (vt (shufp:$src3 + VR128:$src1, (mem_frag addr:$src2))))], d>; + let isConvertibleToThreeAddress = IsConvertibleToThreeAddress in + def rri : PIi8<0xC6, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), asm, + [(set VR128:$dst, + (vt (shufp:$src3 VR128:$src1, VR128:$src2)))], d>; +} + +let isAsmParserOnly = 1 in { + defm VSHUFPS : sse12_shuffle, VEX_4V; + defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm SHUFPS : sse12_shuffle, + TB; + defm SHUFPD : sse12_shuffle, TB, OpSize; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Unpack Instructions +//===----------------------------------------------------------------------===// + +/// sse12_unpack_interleave - sse 1 & 2 unpack and interleave +multiclass sse12_unpack_interleave opc, PatFrag OpNode, ValueType vt, + PatFrag mem_frag, RegisterClass RC, + X86MemOperand x86memop, string asm, + Domain d> { + def rr : PI; + def rm : PI; +} + +let AddedComplexity = 10 in { + let isAsmParserOnly = 1 in { + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; } - def ANDPDrm : PDI<0x54, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "andpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ORPDrm : PDI<0x56, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "orpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (or (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def XORPDrm : PDI<0x57, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "xorpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (xor (bc_v2i64 (v2f64 VR128:$src1)), - (memopv2i64 addr:$src2)))]>; - def ANDNPDrr : PDI<0x55, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (bc_v2i64 (v2f64 VR128:$src2))))]>; - def ANDNPDrm : PDI<0x55, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1,f128mem:$src2), - "andnpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), - (memopv2i64 addr:$src2)))]>; + let Constraints = "$src1 = $dst" in { + defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + defm UNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", + SSEPackedSingle>, TB; + defm UNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", + SSEPackedDouble>, TB, OpSize; + } // Constraints = "$src1 = $dst" +} // AddedComplexity + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Extract Floating-Point Sign mask +//===----------------------------------------------------------------------===// + +/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave +multiclass sse12_extr_sign_mask { + def rr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins RC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set GR32:$dst, (Int RC:$src))], d>; +} + +// Mask creation +defm MOVMSKPS : sse12_extr_sign_mask, TB; +defm MOVMSKPD : sse12_extr_sign_mask, TB, OpSize; + +let isAsmParserOnly = 1 in { + defm VMOVMSKPS : sse12_extr_sign_mask, VEX; + defm VMOVMSKPD : sse12_extr_sign_mask, OpSize, + VEX; + // FIXME: merge with multiclass above when the intrinsics come. + def VMOVMSKPSYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; + def VMOVMSKPDYrr : PI<0x50, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions +//===----------------------------------------------------------------------===// + +// Aliases of packed SSE1 & SSE2 instructions for scalar use. These all have +// names that start with 'Fs'. + +// Alias instructions that map fld0 to pxor for sse. +let isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1, + canFoldAsLoad = 1 in { + // FIXME: Set encoding to pseudo! +def FsFLD0SS : I<0xEF, MRMInitReg, (outs FR32:$dst), (ins), "", + [(set FR32:$dst, fp32imm0)]>, + Requires<[HasSSE1]>, TB, OpSize; +def FsFLD0SD : I<0xEF, MRMInitReg, (outs FR64:$dst), (ins), "", + [(set FR64:$dst, fpimm0)]>, + Requires<[HasSSE2]>, TB, OpSize; +} + +// Alias instruction to do FR32 or FR64 reg-to-reg copy using movaps. Upper +// bits are disregarded. +let neverHasSideEffects = 1 in { +def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), + "movaps\t{$src, $dst|$dst, $src}", []>; +def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), + "movapd\t{$src, $dst|$dst, $src}", []>; +} + +// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper +// bits are disregarded. +let canFoldAsLoad = 1, isReMaterializable = 1 in { +def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), + "movaps\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; +def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), + "movapd\t{$src, $dst|$dst, $src}", + [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; +} + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Logical Instructions +//===----------------------------------------------------------------------===// + +/// sse12_fp_alias_pack_logical - SSE 1 & 2 aliased packed FP logical ops +/// +multiclass sse12_fp_alias_pack_logical opc, string OpcodeStr, + SDNode OpNode> { + let isAsmParserOnly = 1 in { + defm V#NAME#PS : sse12_fp_packed, VEX_4V; + + defm V#NAME#PD : sse12_fp_packed, OpSize, VEX_4V; + } + + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed, TB; + + defm PD : sse12_fp_packed, TB, OpSize; + } +} + +// Alias bitwise logical operations using SSE logical ops on packed FP values. +let mayLoad = 0 in { + defm FsAND : sse12_fp_alias_pack_logical<0x54, "and", X86fand>; + defm FsOR : sse12_fp_alias_pack_logical<0x56, "or", X86for>; + defm FsXOR : sse12_fp_alias_pack_logical<0x57, "xor", X86fxor>; +} + +let neverHasSideEffects = 1, Pattern = [], isCommutable = 0 in + defm FsANDN : sse12_fp_alias_pack_logical<0x55, "andn", undef>; + +/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops +/// +multiclass sse12_fp_packed_logical opc, string OpcodeStr, + SDNode OpNode, int HasPat = 0, + list> Pattern = []> { + let isAsmParserOnly = 1, Pattern = [] in { + defm V#NAME#PS : sse12_fp_packed_logical_rm, + VEX_4V; + + defm V#NAME#PD : sse12_fp_packed_logical_rm, + OpSize, VEX_4V; + } + let Constraints = "$src1 = $dst" in { + defm PS : sse12_fp_packed_logical_rm, TB; + + defm PD : sse12_fp_packed_logical_rm, + TB, OpSize; + } +} + +/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms +/// +let isAsmParserOnly = 1 in { +multiclass sse12_fp_packed_logical_y opc, string OpcodeStr> { + defm PSY : sse12_fp_packed_logical_rm, VEX_4V; + + defm PDY : sse12_fp_packed_logical_rm, OpSize, VEX_4V; +} +} + +// AVX 256-bit packed logical ops forms +defm VAND : sse12_fp_packed_logical_y<0x54, "and">; +defm VOR : sse12_fp_packed_logical_y<0x56, "or">; +defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">; +let isCommutable = 0 in + defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">; + +defm AND : sse12_fp_packed_logical<0x54, "and", and>; +defm OR : sse12_fp_packed_logical<0x56, "or", or>; +defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; +let isCommutable = 0 in + defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [ + // single r+r + [(set VR128:$dst, (v2i64 (and (xor VR128:$src1, + (bc_v2i64 (v4i32 immAllOnesV))), + VR128:$src2)))], + // double r+r + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (bc_v2i64 (v2f64 VR128:$src2))))], + // single r+m + [(set VR128:$dst, (v2i64 (and (xor (bc_v2i64 (v4f32 VR128:$src1)), + (bc_v2i64 (v4i32 immAllOnesV))), + (memopv2i64 addr:$src2))))], + // double r+m + [(set VR128:$dst, (and (vnot (bc_v2i64 (v2f64 VR128:$src1))), + (memopv2i64 addr:$src2)))]]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Arithmetic Instructions +//===----------------------------------------------------------------------===// + +/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and +/// vector forms. +/// +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a scalar) +/// and leaves the top elements unmodified (therefore these cannot be commuted). +/// +/// These three forms can each be reg+reg or reg+mem. +/// +multiclass basic_sse12_fp_binop_s opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar, XS; + defm SD : sse12_fp_scalar, XD; +} + +multiclass basic_sse12_fp_binop_p opc, string OpcodeStr, SDNode OpNode, + bit Is2Addr = 1> { + let mayLoad = 0 in { + defm PS : sse12_fp_packed, TB; + defm PD : sse12_fp_packed, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_p_y opc, string OpcodeStr, + SDNode OpNode> { + let mayLoad = 0 in { + defm PSY : sse12_fp_packed, TB; + defm PDY : sse12_fp_packed, TB, OpSize; + } +} + +multiclass basic_sse12_fp_binop_s_int opc, string OpcodeStr, + bit Is2Addr = 1> { + defm SS : sse12_fp_scalar_int, XS; + defm SD : sse12_fp_scalar_int, XD; +} + +multiclass basic_sse12_fp_binop_p_int opc, string OpcodeStr, + bit Is2Addr = 1> { + defm PS : sse12_fp_packed_int, TB; + + defm PD : sse12_fp_packed_int, TB, OpSize; +} + +// Binary Arithmetic instructions +let isAsmParserOnly = 1 in { + defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; + defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; + + let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; + } +} + +let Constraints = "$src1 = $dst" in { + defm ADD : basic_sse12_fp_binop_s<0x58, "add", fadd>, + basic_sse12_fp_binop_p<0x58, "add", fadd>, + basic_sse12_fp_binop_s_int<0x58, "add">; + defm MUL : basic_sse12_fp_binop_s<0x59, "mul", fmul>, + basic_sse12_fp_binop_p<0x59, "mul", fmul>, + basic_sse12_fp_binop_s_int<0x59, "mul">; + + let isCommutable = 0 in { + defm SUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub>, + basic_sse12_fp_binop_s_int<0x5C, "sub">; + defm DIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv>, + basic_sse12_fp_binop_s_int<0x5E, "div">; + defm MAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_s_int<0x5F, "max">, + basic_sse12_fp_binop_p_int<0x5F, "max">; + defm MIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin>, + basic_sse12_fp_binop_s_int<0x5D, "min">, + basic_sse12_fp_binop_p_int<0x5D, "min">; + } } -let Constraints = "$src1 = $dst" in { - def CMPPDrri : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - VR128:$src, imm:$cc))]>; - def CMPPDrmi : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, SSECC:$cc), - "cmp${cc}pd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_cmp_pd VR128:$src1, - (memop addr:$src), imm:$cc))]>; +/// Unop Arithmetic +/// In addition, we also have a special variant of the scalar form here to +/// represent the associated intrinsic operation. This form is unlike the +/// plain scalar form, in that it takes an entire vector (instead of a +/// scalar) and leaves the top elements undefined. +/// +/// And, we have a special variant form for a full-vector intrinsic form. + +/// sse1_fp_unop_s - SSE1 unops in scalar form. +multiclass sse1_fp_unop_s opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI; + // For scalar unary operations, fold a load into the operation + // only in OptForSize mode. It eliminates an instruction, but it also + // eliminates a whole-register clobber (the load), so it introduces a + // partial register update condition. + def SSm : I, XS, + Requires<[HasSSE1, OptForSize]>; + def SSr_Int : SSI; + def SSm_Int : SSI; +} + +/// sse1_fp_unop_s_avx - AVX SSE1 unops in scalar form. +multiclass sse1_fp_unop_s_avx opc, string OpcodeStr, + SDNode OpNode, Intrinsic F32Int> { + def SSr : SSI; + def SSm : I, XS, Requires<[HasAVX, OptForSize]>; + def SSr_Int : SSI; + def SSm_Int : SSI; +} + +/// sse1_fp_unop_p - SSE1 unops in packed form. +multiclass sse1_fp_unop_p opc, string OpcodeStr, SDNode OpNode> { + def PSr : PSI; + def PSm : PSI; +} + +/// sse1_fp_unop_p_y - AVX 256-bit SSE1 unops in packed form. +multiclass sse1_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode> { + def PSYr : PSI; + def PSYm : PSI; +} + +/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. +multiclass sse1_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V4F32Int> { + def PSr_Int : PSI; + def PSm_Int : PSI; +} + + +/// sse2_fp_unop_s - SSE2 unops in scalar form. +multiclass sse2_fp_unop_s opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { + def SDr : SDI; + // See the comments in sse1_fp_unop_s for why this is OptForSize. + def SDm : I, XD, + Requires<[HasSSE2, OptForSize]>; + def SDr_Int : SDI; + def SDm_Int : SDI; +} + +/// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. +multiclass sse2_fp_unop_s_avx opc, string OpcodeStr, + SDNode OpNode, Intrinsic F64Int> { + def SDr : VSDI; + def SDm : VSDI; + def SDr_Int : VSDI; + def SDm_Int : VSDI; +} + +/// sse2_fp_unop_p - SSE2 unops in vector forms. +multiclass sse2_fp_unop_p opc, string OpcodeStr, + SDNode OpNode> { + def PDr : PDI; + def PDm : PDI; +} + +/// sse2_fp_unop_p_y - AVX SSE2 256-bit unops in vector forms. +multiclass sse2_fp_unop_p_y opc, string OpcodeStr, SDNode OpNode> { + def PDYr : PDI; + def PDYm : PDI; +} + +/// sse2_fp_unop_p_int - SSE2 intrinsic unops in vector forms. +multiclass sse2_fp_unop_p_int opc, string OpcodeStr, + Intrinsic V2F64Int> { + def PDr_Int : PDI; + def PDm_Int : PDI; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Square root. + defm VSQRT : sse1_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse2_fp_unop_s_avx<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + VEX_4V; + + defm VSQRT : sse1_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p<0x51, "vsqrt", fsqrt>, + sse1_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + sse2_fp_unop_p_y<0x51, "vsqrt", fsqrt>, + VEX; + + // Reciprocal approximations. Note that these typically require refinement + // in order to obtain suitable precision. + defm VRSQRT : sse1_fp_unop_s_avx<0x52, "rsqrt", X86frsqrt, + int_x86_sse_rsqrt_ss>, VEX_4V; + defm VRSQRT : sse1_fp_unop_p<0x52, "vrsqrt", X86frsqrt>, + sse1_fp_unop_p_y<0x52, "vrsqrt", X86frsqrt>, VEX; + + defm VRCP : sse1_fp_unop_s_avx<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + VEX_4V; + defm VRCP : sse1_fp_unop_p<0x53, "vrcp", X86frcp>, + sse1_fp_unop_p_y<0x53, "vrcp", X86frcp>, VEX; +} + +// Square root. +defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse_sqrt_ss>, + sse1_fp_unop_p<0x51, "sqrt", fsqrt>, + sse1_fp_unop_p_int<0x51, "sqrt", int_x86_sse_sqrt_ps>, + sse2_fp_unop_s<0x51, "sqrt", fsqrt, int_x86_sse2_sqrt_sd>, + sse2_fp_unop_p<0x51, "sqrt", fsqrt>, + sse2_fp_unop_p_int<0x51, "sqrt", int_x86_sse2_sqrt_pd>; + +// Reciprocal approximations. Note that these typically require refinement +// in order to obtain suitable precision. +defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, int_x86_sse_rsqrt_ss>, + sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt>, + sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps>; +defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, + sse1_fp_unop_p<0x53, "rcp", X86frcp>, + sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps>; + +// There is no f64 version of the reciprocal approximation instructions. + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Non-temporal stores +//===----------------------------------------------------------------------===// + +let isAsmParserOnly = 1 in { + def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; + def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; + + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; + + let AddedComplexity = 400 in { // Prefer non-temporal versions + def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + + def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + } +} + +def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>; +def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; + +let AddedComplexity = 400 in { // Prefer non-temporal versions +def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; +def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; + +def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>; + +let ExeDomain = SSEPackedInt in +def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; + +// There is no AVX form for instructions below this point +def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i32 GR32:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(nontemporalstore (i64 GR64:$src), addr:$dst)]>, + TB, Requires<[HasSSE2]>; + +} +def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), + "movnti\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, + TB, Requires<[HasSSE2]>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Misc Instructions (No AVX form) +//===----------------------------------------------------------------------===// + +// Prefetch intrinsic. +def PREFETCHT0 : PSI<0x18, MRM1m, (outs), (ins i8mem:$src), + "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>; +def PREFETCHT1 : PSI<0x18, MRM2m, (outs), (ins i8mem:$src), + "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>; +def PREFETCHT2 : PSI<0x18, MRM3m, (outs), (ins i8mem:$src), + "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>; +def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src), + "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>; + +// Load, store, and memory fence +def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>, + TB, Requires<[HasSSE1]>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-zeros value if folding it would be beneficial. +// FIXME: Change encoding to pseudo! +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1 in { +def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4f32 immAllZerosV))]>; +def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v2f64 immAllZerosV))]>; +let ExeDomain = SSEPackedInt in +def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllZerosV))]>; +} + +def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>; +def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>; +def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>; + +def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), + (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>; + +//===----------------------------------------------------------------------===// +// SSE 1 & 2 - Load/Store XCSR register +//===----------------------------------------------------------------------===// - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def CMPPDrri_alt : PDIi8<0xC2, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; - def CMPPDrmi_alt : PDIi8<0xC2, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src, i8imm:$src2), - "cmppd\t{$src2, $src, $dst|$dst, $src, $src2}", []>; -} + def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; + def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; } -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), - (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; -def : Pat<(v2i64 (X86cmppd (v2f64 VR128:$src1), (memop addr:$src2), imm:$cc)), - (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; -// Shuffle and unpack instructions -let Constraints = "$src1 = $dst" in { - def SHUFPDrri : PDIi8<0xC6, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 VR128:$src1, VR128:$src2)))]>; - def SHUFPDrmi : PDIi8<0xC6, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - f128mem:$src2, i8imm:$src3), - "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (v2f64 (shufp:$src3 - VR128:$src1, (memopv2f64 addr:$src2))))]>; +def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; +def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>; - let AddedComplexity = 10 in { - def UNPCKHPDrr : PDI<0x15, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, VR128:$src2)))]>; - def UNPCKHPDrm : PDI<0x15, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpckhpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckh VR128:$src1, - (memopv2f64 addr:$src2))))]>; - - def UNPCKLPDrr : PDI<0x14, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v2f64 (unpckl VR128:$src1, VR128:$src2)))]>; - def UNPCKLPDrm : PDI<0x14, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "unpcklpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, (memopv2f64 addr:$src2)))]>; - } // AddedComplexity -} // Constraints = "$src1 = $dst" +//===---------------------------------------------------------------------===// +// SSE2 - Move Aligned/Unaligned Packed Integer Instructions +//===---------------------------------------------------------------------===// +let ExeDomain = SSEPackedInt in { // SSE integer instructions +let isAsmParserOnly = 1 in { + let neverHasSideEffects = 1 in + def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; + def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; + + let canFoldAsLoad = 1, mayLoad = 1 in { + def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>, + VEX; + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, + XS, VEX, Requires<[HasAVX]>; + } -//===---------------------------------------------------------------------===// -// SSE integer instructions -let ExeDomain = SSEPackedInt in { + let mayStore = 1 in { + def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>, VEX; + def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, + XS, VEX, Requires<[HasAVX]>; + } +} -// Move Instructions let neverHasSideEffects = 1 in def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movdqa\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1, mayLoad = 1 in + +let canFoldAsLoad = 1, mayLoad = 1 in { def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqa\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/]>; -let mayStore = 1 in -def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", - [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; -let canFoldAsLoad = 1, mayLoad = 1 in def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(set VR128:$dst, (loadv2i64 addr:$src))*/]>, XS, Requires<[HasSSE2]>; -let mayStore = 1 in +} + +let mayStore = 1 in { +def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", + [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/]>; def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [/*(store (v2i64 VR128:$src), addr:$dst)*/]>, XS, Requires<[HasSSE2]>; +} // Intrinsic forms of MOVDQU load and store +let isAsmParserOnly = 1 in { +let canFoldAsLoad = 1 in +def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, + XS, VEX, Requires<[HasAVX]>; +def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, + XS, VEX, Requires<[HasAVX]>; +} + let canFoldAsLoad = 1 in def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movdqu\t{$src, $dst|$dst, $src}", @@ -2019,55 +2021,72 @@ def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, XS, Requires<[HasSSE2]>; -let Constraints = "$src1 = $dst" in { +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Arithmetic Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, + (bitconvert (memopv2i64 addr:$src2))))]>; } multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, - string OpcodeStr, - Intrinsic IntId, Intrinsic IntId2> { + string OpcodeStr, Intrinsic IntId, + Intrinsic IntId2, bit Is2Addr = 1> { def rr : PDI; + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2))]>; def rm : PDI; def ri : PDIi8; + (ins VR128:$src1, i32i8imm:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (IntId2 VR128:$src1, (i32 imm:$src2)))]>; } /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { + ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI; } @@ -2077,64 +2096,177 @@ multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, /// to collapse (bitconvert VT to VT) into its operand. /// multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, - bit Commutable = 0> { + bit IsCommutable = 0, bit Is2Addr = 1> { + let isCommutable = IsCommutable in def rr : PDI { - let isCommutable = Commutable; - } + (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))]>; def rm : PDI; + (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; } -} // Constraints = "$src1 = $dst" } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; -defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; - -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; +defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; +defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; +defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; +defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; +defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; +defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; +defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; +defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; + +// Intrinsic forms +defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, + VEX_4V; +defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, + VEX_4V; +defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, + VEX_4V; +defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, + VEX_4V; +defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, + VEX_4V; +defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, + VEX_4V; +defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, + VEX_4V; +defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, + VEX_4V; +defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, + VEX_4V; +defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, + VEX_4V; +defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, + VEX_4V; +defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, + VEX_4V; +defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, + VEX_4V; +defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, + VEX_4V; +defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, + VEX_4V; +defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, + VEX_4V; +defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, + VEX_4V; +defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, + VEX_4V; +defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, + VEX_4V; +} +let Constraints = "$src1 = $dst" in { +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; +// Intrinsic forms defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; - -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; - +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w , 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; - defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; + +} // Constraints = "$src1 = $dst" -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Logical Instructions +//===---------------------------------------------------------------------===// +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, + VEX_4V; +defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, + VEX_4V; +defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, + VEX_4V; + +defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, + VEX_4V; +defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, + VEX_4V; +defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, + VEX_4V; + +defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, + VEX_4V; +defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, + VEX_4V; + +defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; +defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; +defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 1, 0>, VEX_4V; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQri doesn't exist in SSE[1-3]. + } + def VPANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>, VEX_4V; + def VPANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>, + VEX_4V; +} +} +let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", @@ -2154,17 +2286,34 @@ defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; -// 128-bit logical shifts. -let Constraints = "$src1 = $dst", neverHasSideEffects = 1, - ExeDomain = SSEPackedInt in { - def PSLLDQri : PDIi8<0x73, MRM7r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "pslldq\t{$src2, $dst|$dst, $src2}", []>; - def PSRLDQri : PDIi8<0x73, MRM3r, - (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), - "psrldq\t{$src2, $dst|$dst, $src2}", []>; - // PSRADQri doesn't exist in SSE[1-3]. +defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; +defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; +defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def PSLLDQri : PDIi8<0x73, MRM7r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "pslldq\t{$src2, $dst|$dst, $src2}", []>; + def PSRLDQri : PDIi8<0x73, MRM3r, + (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2), + "psrldq\t{$src2, $dst|$dst, $src2}", []>; + // PSRADQri doesn't exist in SSE[1-3]. + } + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + VR128:$src2)))]>; + + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", + [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), + (memopv2i64 addr:$src2))))]>; } +} // Constraints = "$src1 = $dst" let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), @@ -2185,32 +2334,33 @@ let Predicates = [HasSSE2] in { (v2i64 (PSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; } -// Logical -defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; -defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>; -defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>; - -let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in { - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - VR128:$src2)))]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Comparison Instructions +//===---------------------------------------------------------------------===// - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1), - (memopv2i64 addr:$src2))))]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, + 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, + 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1, + 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0, + 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0, + 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, + 0>, VEX_4V; } -// SSE2 Integer comparison -defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b>; -defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w>; -defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d>; -defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; -defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; -defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +let Constraints = "$src1 = $dst" in { + defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>; + defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>; + defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>; + defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; + defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; + defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; +} // Constraints = "$src1 = $dst" def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (PCMPEQBrr VR128:$src1, VR128:$src2)>; @@ -2238,94 +2388,147 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, VR128:$src2)), def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), (PCMPGTDrm VR128:$src1, addr:$src2)>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Pack Instructions +//===---------------------------------------------------------------------===// + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, + 0, 0>, VEX_4V; +defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, + 0, 0>, VEX_4V; +defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, + 0, 0>, VEX_4V; +} -// Pack instructions +let Constraints = "$src1 = $dst" in { defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; +} // Constraints = "$src1 = $dst" + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Shuffle Instructions +//===---------------------------------------------------------------------===// let ExeDomain = SSEPackedInt in { +multiclass sse2_pshuffle { +def ri : Ii8<0x70, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 VR128:$src1, + (undef))))]>; +def mi : Ii8<0x70, MRMSrcMem, + (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (vt (pshuf_frag:$src2 + (bc_frag (memopv2i64 addr:$src1)), + (undef))))]>; +} +} // ExeDomain = SSEPackedInt -// Shuffle and unpack instructions -let AddedComplexity = 5 in { -def PSHUFDri : PDIi8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - VR128:$src1, (undef))))]>; -def PSHUFDmi : PDIi8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v4i32 (pshufd:$src2 - (bc_v4i32 (memopv2i64 addr:$src1)), - (undef))))]>; -} - -// SSE2 with ImmT == Imm8 and XS prefix. -def PSHUFHWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 VR128:$src1, - (undef))))]>, - XS, Requires<[HasSSE2]>; -def PSHUFHWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshufhw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshufhw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XS, Requires<[HasSSE2]>; - -// SSE2 with ImmT == Imm8 and XD prefix. -def PSHUFLWri : Ii8<0x70, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 VR128:$src1, - (undef))))]>, - XD, Requires<[HasSSE2]>; -def PSHUFLWmi : Ii8<0x70, MRMSrcMem, - (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2), - "pshuflw\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, (v8i16 (pshuflw:$src2 - (bc_v8i16 (memopv2i64 addr:$src1)), - (undef))))]>, - XD, Requires<[HasSSE2]>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, + VEX; + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle<"vpshufhw", v8i16, pshufhw, bc_v8i16>, XS, + VEX; -let Constraints = "$src1 = $dst" in { - def PUNPCKLBWrr : PDI<0x60, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLBWrm : PDI<0x60, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKLWDrr : PDI<0x61, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle<"vpshuflw", v8i16, pshuflw, bc_v8i16>, XD, + VEX; +} + +let Predicates = [HasSSE2] in { + let AddedComplexity = 5 in + defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, pshufhw, bc_v8i16>, XS; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, pshuflw, bc_v8i16>, XD; +} + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Unpack Instructions +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, + PatFrag unp_frag, PatFrag bc_frag, bit Is2Addr = 1> { + def rr : PDI; + def rm : PDI; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, unpckl, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v8i16 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLWDrm : PDI<0x61, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpcklwd\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKLQDQrm : PDI<0x6C, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKLDQrr : PDI<0x62, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + (v2i64 (unpckl VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, unpckh, bc_v16i8, + 0>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, unpckh, bc_v8i16, + 0>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, unpckh, bc_v4i32, + 0>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (v4i32 (unpckl VR128:$src1, VR128:$src2)))]>; - def PUNPCKLDQrm : PDI<0x62, MRMSrcMem, + (v2i64 (unpckh VR128:$src1, VR128:$src2)))]>, VEX_4V; + def VPUNPCKHQDQrm : PDI<0x6D, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckldq\t{$src2, $dst|$dst, $src2}", + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set VR128:$dst, - (unpckl VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + (v2i64 (unpckh VR128:$src1, + (memopv2i64 addr:$src2))))]>, VEX_4V; +} + +let Constraints = "$src1 = $dst" in { + defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, unpckl, bc_v16i8>; + defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, unpckl, bc_v8i16>; + defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, unpckl, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKLQDQrr : PDI<0x6C, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpcklqdq\t{$src2, $dst|$dst, $src2}", @@ -2338,39 +2541,12 @@ let Constraints = "$src1 = $dst" in { (v2i64 (unpckl VR128:$src1, (memopv2i64 addr:$src2))))]>; - def PUNPCKHBWrr : PDI<0x68, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v16i8 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHBWrm : PDI<0x68, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhbw\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v16i8 (memopv2i64 addr:$src2))))]>; - def PUNPCKHWDrr : PDI<0x69, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v8i16 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHWDrm : PDI<0x69, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhwd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v8i16 (memopv2i64 addr:$src2))))]>; - def PUNPCKHDQrr : PDI<0x6A, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (v4i32 (unpckh VR128:$src1, VR128:$src2)))]>; - def PUNPCKHDQrm : PDI<0x6A, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "punpckhdq\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, - (unpckh VR128:$src1, - (bc_v4i32 (memopv2i64 addr:$src2))))]>; + defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, unpckh, bc_v16i8>; + defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, unpckh, bc_v8i16>; + defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, unpckh, bc_v4i32>; + + /// FIXME: we could eliminate this and use sse2_unpack instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. def PUNPCKHQDQrr : PDI<0x6D, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "punpckhqdq\t{$src2, $dst|$dst, $src2}", @@ -2384,102 +2560,117 @@ let Constraints = "$src1 = $dst" in { (memopv2i64 addr:$src2))))]>; } -// Extract / Insert +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// +// SSE2 - Packed Integer Extract and Insert +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_pinsrw { + def rri : Ii8<0xC4, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, + GR32:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; + def rmi : Ii8<0xC4, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, + i16mem:$src2, i32i8imm:$src3), + !if(Is2Addr, + "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", + "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + [(set VR128:$dst, + (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), + imm:$src3))]>; +} + +// Extract +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VPEXTRWri : Ii8<0xC5, MRMSrcReg, + (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), + "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), + imm:$src2))]>, OpSize, VEX; def PEXTRWri : PDIi8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", [(set GR32:$dst, (X86pextrw (v8i16 VR128:$src1), imm:$src2))]>; -let Constraints = "$src1 = $dst" in { - def PINSRWrri : PDIi8<0xC4, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, - GR32:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, GR32:$src2, imm:$src3))]>; - def PINSRWrmi : PDIi8<0xC4, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, - i16mem:$src2, i32i8imm:$src3), - "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", - [(set VR128:$dst, - (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), - imm:$src3))]>; -} - -// Mask creation -def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), - "pmovmskb\t{$src, $dst|$dst, $src}", - [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; -// Conditional store -let Uses = [EDI] in -def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; +// Insert +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm PINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; -let Uses = [RDI] in -def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), - "maskmovdqu\t{$mask, $src|$src, $mask}", - [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; +let Constraints = "$src1 = $dst" in + defm VPINSRW : sse2_pinsrw, TB, OpSize; } // ExeDomain = SSEPackedInt -// Non-temporal stores -def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>; -let ExeDomain = SSEPackedInt in -def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>; -def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), - "movnti\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>, - TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Packed Mask Creation +//===---------------------------------------------------------------------===// -let AddedComplexity = 400 in { // Prefer non-temporal versions -def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>; +let ExeDomain = SSEPackedInt in { -let ExeDomain = SSEPackedInt in -def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>; -} +let isAsmParserOnly = 1 in +def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; +def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; -// Flush cache -def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), - "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, - TB, Requires<[HasSSE2]>; +} // ExeDomain = SSEPackedInt -// Load, store, and memory fence -def LFENCE : I<0xAE, MRM_E8, (outs), (ins), - "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; -def MFENCE : I<0xAE, MRM_F0, (outs), (ins), - "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; +//===---------------------------------------------------------------------===// +// SSE2 - Conditional Store +//===---------------------------------------------------------------------===// -// Pause. This "instruction" is encoded as "rep; nop", so even though it -// was introduced with SSE2, it's backward compatible. -def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; +let ExeDomain = SSEPackedInt in { -//TODO: custom lower this so as to never even generate the noop -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 0)), (NOOP)>; -def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; -def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; -def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), - (i8 1)), (MFENCE)>; +let isAsmParserOnly = 1 in { +let Uses = [EDI] in +def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>, VEX; +let Uses = [RDI] in +def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), + (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; +} + +let Uses = [EDI] in +def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)]>; +let Uses = [RDI] in +def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), + "maskmovdqu\t{$mask, $src|$src, $mask}", + [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>; + +} // ExeDomain = SSEPackedInt -// Alias instructions that map zero vector to pxor / xorp* for sse. -// We set canFoldAsLoad because this can be converted to a constant-pool -// load of an all-ones value if folding it would be beneficial. -let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, - isCodeGenOnly = 1, ExeDomain = SSEPackedInt in - // FIXME: Change encoding to pseudo. - def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", - [(set VR128:$dst, (v4i32 immAllOnesV))]>; +//===---------------------------------------------------------------------===// +// SSE2 - Move Doubleword +//===---------------------------------------------------------------------===// +// Move Int Doubleword to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector GR32:$src)))]>, VEX; +def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, + VEX; +} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2489,6 +2680,18 @@ def MOVDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>; + +// Move Int Doubleword to Single Scalar +let isAsmParserOnly = 1 in { +def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; + +def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, + VEX; +} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2497,20 +2700,18 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; -// SSE2 instructions with XS prefix -def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), - "movq\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, - (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, - Requires<[HasSSE2]>; -def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(store (i64 (vector_extract (v2i64 VR128:$src), - (iPTR 0))), addr:$dst)]>; - -def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), - (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; - +// Move Packed Doubleword Int to Packed Double Int +let isAsmParserOnly = 1 in { +def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), + (iPTR 0)))]>, VEX; +def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), + (ins i32mem:$dst, VR128:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (vector_extract (v4i32 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2520,6 +2721,15 @@ def MOVPDI2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>; +// Move Scalar Single to Double Int +let isAsmParserOnly = 1 in { +def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; +def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; +} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -2527,25 +2737,38 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; -// Store / copy lower 64-bits of a XMM register. -def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), - "movq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; - // movd / movq to XMM register zero-extends +let AddedComplexity = 15, isAsmParserOnly = 1 in { +def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v4i32 (X86vzmovl + (v4i32 (scalar_to_vector GR32:$src)))))]>, + VEX; +def VMOVZQI2PQIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only + [(set VR128:$dst, (v2i64 (X86vzmovl + (v2i64 (scalar_to_vector GR64:$src)))))]>, + VEX, VEX_W; +} let AddedComplexity = 15 in { def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))))]>; -// This is X86-64 only. def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), - "mov{d|q}\t{$src, $dst|$dst, $src}", + "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))))]>; } let AddedComplexity = 20 in { +let isAsmParserOnly = 1 in +def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), + "movd\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v4i32 (X86vzmovl (v4i32 (scalar_to_vector + (loadi32 addr:$src))))))]>, + VEX; def MOVZDI2PDIrm : PDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2558,13 +2781,63 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), (MOVZDI2PDIrm addr:$src)>; +} + +//===---------------------------------------------------------------------===// +// SSE2 - Move Quadword +//===---------------------------------------------------------------------===// + +// Move Quadword Int to Packed Quadword Int +let isAsmParserOnly = 1 in +def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + VEX, Requires<[HasAVX]>; +def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "movq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, + Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix + +// Move Packed Quadword Int to Quadword Int +let isAsmParserOnly = 1 in +def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>, VEX; +def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(store (i64 (vector_extract (v2i64 VR128:$src), + (iPTR 0))), addr:$dst)]>; + +def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), + (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; + +// Store / copy lower 64-bits of a XMM register. +let isAsmParserOnly = 1 in +def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; +def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), + "movq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, + (v2i64 (X86vzmovl (v2i64 (scalar_to_vector + (loadi64 addr:$src))))))]>, + XS, VEX, Requires<[HasAVX]>; + +let AddedComplexity = 20 in { def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 (scalar_to_vector - (loadi64 addr:$src))))))]>, XS, - Requires<[HasSSE2]>; + (loadi64 addr:$src))))))]>, + XS, Requires<[HasSSE2]>; def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVZQI2PQIrm addr:$src)>; @@ -2575,12 +2848,23 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. +let isAsmParserOnly = 1, AddedComplexity = 15 in +def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; +let AddedComplexity = 20, isAsmParserOnly = 1 in +def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovq\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (v2i64 (X86vzmovl + (loadv2i64 addr:$src))))]>, + XS, VEX, Requires<[HasAVX]>; let AddedComplexity = 20 in { def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movq\t{$src, $dst|$dst, $src}", @@ -2592,49 +2876,136 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), (MOVZPQILo2PQIrm addr:$src)>; } +// Instructions to match in the assembler +let isAsmParserOnly = 1 in { +// This instructions is in fact an alias to movd with 64 bit dst +def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), + "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; +} + // Instructions for the disassembler // xr = XMM register // xm = mem64 +let isAsmParserOnly = 1 in +def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", []>, XS; //===---------------------------------------------------------------------===// -// SSE3 Instructions +// SSE2 - Misc Instructions //===---------------------------------------------------------------------===// -// Move Instructions -def MOVSHDUPrr : S3SI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movshdup - VR128:$src, (undef))))]>; -def MOVSHDUPrm : S3SI<0x16, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movshdup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movshdup - (memopv4f32 addr:$src), (undef)))]>; +// Flush cache +def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), + "clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>, + TB, Requires<[HasSSE2]>; + +// Load, store, and memory fence +def LFENCE : I<0xAE, MRM_E8, (outs), (ins), + "lfence", [(int_x86_sse2_lfence)]>, TB, Requires<[HasSSE2]>; +def MFENCE : I<0xAE, MRM_F0, (outs), (ins), + "mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>; + +// Pause. This "instruction" is encoded as "rep; nop", so even though it +// was introduced with SSE2, it's backward compatible. +def PAUSE : I<0x90, RawFrm, (outs), (ins), "pause", []>, REP; + +//TODO: custom lower this so as to never even generate the noop +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 0)), (NOOP)>; +def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>; +def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>; +def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm), + (i8 1)), (MFENCE)>; + +// Alias instructions that map zero vector to pxor / xorp* for sse. +// We set canFoldAsLoad because this can be converted to a constant-pool +// load of an all-ones value if folding it would be beneficial. +let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, + isCodeGenOnly = 1, ExeDomain = SSEPackedInt in + // FIXME: Change encoding to pseudo. + def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, (v4i32 immAllOnesV))]>; + +//===---------------------------------------------------------------------===// +// SSE3 - Conversion Instructions +//===---------------------------------------------------------------------===// + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; +} + +def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; +def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "cvtdq2pd\t{$src, $dst|$dst, $src}", []>; + +//===---------------------------------------------------------------------===// +// SSE3 - Move Instructions +//===---------------------------------------------------------------------===// -def MOVSLDUPrr : S3SI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (v4f32 (movsldup +// Replicate Single FP +multiclass sse3_replicate_sfp op, PatFrag rep_frag, string OpcodeStr> { +def rr : S3SI; -def MOVSLDUPrm : S3SI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movsldup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (movsldup +def rm : S3SI; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; +defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; +} +defm MOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "movshdup">; +defm MOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "movsldup">; -def MOVDDUPrr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movddup\t{$src, $dst|$dst, $src}", - [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; -def MOVDDUPrm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "movddup\t{$src, $dst|$dst, $src}", +// Replicate Double FP +multiclass sse3_replicate_dfp { +def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set VR128:$dst,(v2f64 (movddup VR128:$src, (undef))))]>; +def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (v2f64 (movddup (scalar_to_vector (loadf64 addr:$src)), (undef))))]>; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; +defm MOVDDUP : sse3_replicate_dfp<"movddup">; + +// Move Unaligned Integer +let isAsmParserOnly = 1 in + def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vlddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; +def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "lddqu\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; def : Pat<(movddup (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src)))), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; +// Several Move patterns let AddedComplexity = 5 in { def : Pat<(movddup (memopv2f64 addr:$src), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; @@ -2646,69 +3017,6 @@ def : Pat<(movddup (bc_v4i32 (memopv2i64 addr:$src)), (undef)), (MOVDDUPrm addr:$src)>, Requires<[HasSSE3]>; } -// Arithmetic -let Constraints = "$src1 = $dst" in { - def ADDSUBPSrr : S3DI<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - VR128:$src2))]>; - def ADDSUBPSrm : S3DI<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubps\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_ps VR128:$src1, - (memop addr:$src2)))]>; - def ADDSUBPDrr : S3I<0xD0, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - VR128:$src2))]>; - def ADDSUBPDrm : S3I<0xD0, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), - "addsubpd\t{$src2, $dst|$dst, $src2}", - [(set VR128:$dst, (int_x86_sse3_addsub_pd VR128:$src1, - (memop addr:$src2)))]>; -} - -def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "lddqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>; - -// Horizontal ops -class S3D_Intrr o, string OpcodeStr, Intrinsic IntId> - : S3DI; -class S3D_Intrm o, string OpcodeStr, Intrinsic IntId> - : S3DI; -class S3_Intrr o, string OpcodeStr, Intrinsic IntId> - : S3I; -class S3_Intrm o, string OpcodeStr, Intrinsic IntId> - : S3I; - -let Constraints = "$src1 = $dst" in { - def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; - def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>; - def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>; - def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; - def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; -} - -// Thread synchronization -def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", - [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; -def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", - [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; - // vector_shuffle v1, <1, 1, 3, 3> let AddedComplexity = 15 in def : Pat<(v4i32 (movshdup VR128:$src, (undef))), @@ -2726,78 +3034,108 @@ let AddedComplexity = 20 in (MOVSLDUPrm addr:$src)>, Requires<[HasSSE3]>; //===---------------------------------------------------------------------===// -// SSSE3 Instructions +// SSE3 - Arithmetic //===---------------------------------------------------------------------===// -/// SS3I_unop_rm_int_8 - Simple SSSE3 unary operator whose type is v*i8. -multiclass SS3I_unop_rm_int_8 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { - def rr64 : SS38I; - - def rm64 : SS38I; +multiclass sse3_addsub { + def rr : I<0xD0, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + VR128:$src2))]>; + def rm : I<0xD0, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, f128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, (Int VR128:$src1, + (memop addr:$src2)))]>; - def rr128 : SS38I, - OpSize; +} - def rm128 : SS38I, OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX], + ExeDomain = SSEPackedDouble in { + defm VADDSUBPS : sse3_addsub, XD, + VEX_4V; + defm VADDSUBPD : sse3_addsub, OpSize, + VEX_4V; +} +let Constraints = "$src1 = $dst", Predicates = [HasSSE3], + ExeDomain = SSEPackedDouble in { + defm ADDSUBPS : sse3_addsub, XD; + defm ADDSUBPD : sse3_addsub, TB, OpSize; } -/// SS3I_unop_rm_int_16 - Simple SSSE3 unary operator whose type is v*i16. -multiclass SS3I_unop_rm_int_16 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { - def rr64 : SS38I; +//===---------------------------------------------------------------------===// +// SSE3 Instructions +//===---------------------------------------------------------------------===// - def rm64 : SS38I; +// Horizontal ops +class S3D_Intrr o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> + : S3DI; +class S3D_Intrm o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> + : S3DI; +class S3_Intrr o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> + : S3I; +class S3_Intrm o, string OpcodeStr, Intrinsic IntId, bit Is2Addr = 1> + : S3I; - def rr128 : SS38I, - OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + def VHADDPSrr : S3D_Intrr<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPSrm : S3D_Intrm<0x7C, "vhaddps", int_x86_sse3_hadd_ps, 0>, VEX_4V; + def VHADDPDrr : S3_Intrr <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHADDPDrm : S3_Intrm <0x7C, "vhaddpd", int_x86_sse3_hadd_pd, 0>, VEX_4V; + def VHSUBPSrr : S3D_Intrr<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPSrm : S3D_Intrm<0x7D, "vhsubps", int_x86_sse3_hsub_ps, 0>, VEX_4V; + def VHSUBPDrr : S3_Intrr <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; + def VHSUBPDrm : S3_Intrm <0x7D, "vhsubpd", int_x86_sse3_hsub_pd, 0>, VEX_4V; +} - def rm128 : SS38I, OpSize; +let Constraints = "$src1 = $dst" in { + def HADDPSrr : S3D_Intrr<0x7C, "haddps", int_x86_sse3_hadd_ps>; + def HADDPSrm : S3D_Intrm<0x7C, "haddps", int_x86_sse3_hadd_ps>; + def HADDPDrr : S3_Intrr <0x7C, "haddpd", int_x86_sse3_hadd_pd>; + def HADDPDrm : S3_Intrm <0x7C, "haddpd", int_x86_sse3_hadd_pd>; + def HSUBPSrr : S3D_Intrr<0x7D, "hsubps", int_x86_sse3_hsub_ps>; + def HSUBPSrm : S3D_Intrm<0x7D, "hsubps", int_x86_sse3_hsub_ps>; + def HSUBPDrr : S3_Intrr <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; + def HSUBPDrm : S3_Intrm <0x7D, "hsubpd", int_x86_sse3_hsub_pd>; } -/// SS3I_unop_rm_int_32 - Simple SSSE3 unary operator whose type is v*i32. -multiclass SS3I_unop_rm_int_32 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128> { - def rr64 : SS38I opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128> { + def rr64 : SS38I; - def rm64 : SS38I; + (IntId64 (bitconvert (mem_frag64 addr:$src))))]>; def rr128 : SS38I opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set VR128:$dst, (IntId128 - (bitconvert (memopv4i32 addr:$src))))]>, OpSize; + (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } -defm PABSB : SS3I_unop_rm_int_8 <0x1C, "pabsb", - int_x86_ssse3_pabs_b, - int_x86_ssse3_pabs_b_128>; -defm PABSW : SS3I_unop_rm_int_16<0x1D, "pabsw", - int_x86_ssse3_pabs_w, - int_x86_ssse3_pabs_w_128>; -defm PABSD : SS3I_unop_rm_int_32<0x1E, "pabsd", - int_x86_ssse3_pabs_d, - int_x86_ssse3_pabs_d_128>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>, VEX; + defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>, VEX; + defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>, VEX; +} -/// SS3I_binop_rm_int_8 - Simple SSSE3 binary operator whose type is v*i8. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_8 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I { - let isCommutable = Commutable; - } - def rm64 : SS38I; - - def rr128 : SS38I, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I, OpSize; - } +defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv8i8, memopv16i8, + int_x86_ssse3_pabs_b, + int_x86_ssse3_pabs_b_128>; +defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv4i16, memopv8i16, + int_x86_ssse3_pabs_w, + int_x86_ssse3_pabs_w_128>; +defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", memopv2i32, memopv4i32, + int_x86_ssse3_pabs_d, + int_x86_ssse3_pabs_d_128>; + +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Binary Operator Instructions +//===---------------------------------------------------------------------===// + +/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. +multiclass SS3I_binop_rm_int opc, string OpcodeStr, + PatFrag mem_frag64, PatFrag mem_frag128, + Intrinsic IntId64, Intrinsic IntId128, + bit Is2Addr = 1> { + let isCommutable = 1 in + def rr64 : SS38I; + def rm64 : SS38I; + + let isCommutable = 1 in + def rr128 : SS38I, + OpSize; + def rm128 : SS38I, OpSize; } -/// SS3I_binop_rm_int_16 - Simple SSSE3 binary operator whose type is v*i16. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_16 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I { - let isCommutable = Commutable; - } - def rm64 : SS38I; - - def rr128 : SS38I, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I, OpSize; - } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128, 0>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int<0x02, "vphaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128, 0>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128, 0>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int<0x05, "vphsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128, 0>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int<0x06, "vphsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128, 0>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128, 0>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128, 0>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int<0x00, "vpshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128, 0>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int<0x08, "vpsignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128, 0>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int<0x09, "vpsignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128, 0>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int<0x0A, "vpsignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128, 0>, VEX_4V; +} +defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } -/// SS3I_binop_rm_int_32 - Simple SSSE3 binary operator whose type is v*i32. -let Constraints = "$src1 = $dst" in { - multiclass SS3I_binop_rm_int_32 opc, string OpcodeStr, - Intrinsic IntId64, Intrinsic IntId128, - bit Commutable = 0> { - def rr64 : SS38I { - let isCommutable = Commutable; - } - def rm64 : SS38I; - - def rr128 : SS38I, - OpSize { - let isCommutable = Commutable; - } - def rm128 : SS38I, OpSize; - } +// None of these have i8 immediate fields. +let ImmT = NoImm, Constraints = "$src1 = $dst" in { +let isCommutable = 0 in { + defm PHADDW : SS3I_binop_rm_int<0x01, "phaddw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_w, + int_x86_ssse3_phadd_w_128>; + defm PHADDD : SS3I_binop_rm_int<0x02, "phaddd", memopv2i32, memopv4i32, + int_x86_ssse3_phadd_d, + int_x86_ssse3_phadd_d_128>; + defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", memopv4i16, memopv8i16, + int_x86_ssse3_phadd_sw, + int_x86_ssse3_phadd_sw_128>; + defm PHSUBW : SS3I_binop_rm_int<0x05, "phsubw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_w, + int_x86_ssse3_phsub_w_128>; + defm PHSUBD : SS3I_binop_rm_int<0x06, "phsubd", memopv2i32, memopv4i32, + int_x86_ssse3_phsub_d, + int_x86_ssse3_phsub_d_128>; + defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", memopv4i16, memopv8i16, + int_x86_ssse3_phsub_sw, + int_x86_ssse3_phsub_sw_128>; + defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", memopv8i8, memopv16i8, + int_x86_ssse3_pmadd_ub_sw, + int_x86_ssse3_pmadd_ub_sw_128>; + defm PSHUFB : SS3I_binop_rm_int<0x00, "pshufb", memopv8i8, memopv16i8, + int_x86_ssse3_pshuf_b, + int_x86_ssse3_pshuf_b_128>; + defm PSIGNB : SS3I_binop_rm_int<0x08, "psignb", memopv8i8, memopv16i8, + int_x86_ssse3_psign_b, + int_x86_ssse3_psign_b_128>; + defm PSIGNW : SS3I_binop_rm_int<0x09, "psignw", memopv4i16, memopv8i16, + int_x86_ssse3_psign_w, + int_x86_ssse3_psign_w_128>; + defm PSIGND : SS3I_binop_rm_int<0x0A, "psignd", memopv2i32, memopv4i32, + int_x86_ssse3_psign_d, + int_x86_ssse3_psign_d_128>; +} +defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", memopv4i16, memopv8i16, + int_x86_ssse3_pmul_hr_sw, + int_x86_ssse3_pmul_hr_sw_128>; } -let ImmT = NoImm in { // None of these have i8 immediate fields. -defm PHADDW : SS3I_binop_rm_int_16<0x01, "phaddw", - int_x86_ssse3_phadd_w, - int_x86_ssse3_phadd_w_128>; -defm PHADDD : SS3I_binop_rm_int_32<0x02, "phaddd", - int_x86_ssse3_phadd_d, - int_x86_ssse3_phadd_d_128>; -defm PHADDSW : SS3I_binop_rm_int_16<0x03, "phaddsw", - int_x86_ssse3_phadd_sw, - int_x86_ssse3_phadd_sw_128>; -defm PHSUBW : SS3I_binop_rm_int_16<0x05, "phsubw", - int_x86_ssse3_phsub_w, - int_x86_ssse3_phsub_w_128>; -defm PHSUBD : SS3I_binop_rm_int_32<0x06, "phsubd", - int_x86_ssse3_phsub_d, - int_x86_ssse3_phsub_d_128>; -defm PHSUBSW : SS3I_binop_rm_int_16<0x07, "phsubsw", - int_x86_ssse3_phsub_sw, - int_x86_ssse3_phsub_sw_128>; -defm PMADDUBSW : SS3I_binop_rm_int_8 <0x04, "pmaddubsw", - int_x86_ssse3_pmadd_ub_sw, - int_x86_ssse3_pmadd_ub_sw_128>; -defm PMULHRSW : SS3I_binop_rm_int_16<0x0B, "pmulhrsw", - int_x86_ssse3_pmul_hr_sw, - int_x86_ssse3_pmul_hr_sw_128, 1>; - -defm PSHUFB : SS3I_binop_rm_int_8 <0x00, "pshufb", - int_x86_ssse3_pshuf_b, - int_x86_ssse3_pshuf_b_128>; -defm PSIGNB : SS3I_binop_rm_int_8 <0x08, "psignb", - int_x86_ssse3_psign_b, - int_x86_ssse3_psign_b_128>; -defm PSIGNW : SS3I_binop_rm_int_16<0x09, "psignw", - int_x86_ssse3_psign_w, - int_x86_ssse3_psign_w_128>; -defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", - int_x86_ssse3_psign_d, - int_x86_ssse3_psign_d_128>; -} - -// palignr patterns. -let Constraints = "$src1 = $dst" in { - def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>; - - def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; - def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", - []>, OpSize; +def : Pat<(X86pshufb VR128:$src, VR128:$mask), + (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; +def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), + (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; + +//===---------------------------------------------------------------------===// +// SSSE3 - Packed Align Instruction Patterns +//===---------------------------------------------------------------------===// + +multiclass sse3_palign { + def R64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + def R64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>; + + def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; + def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !if(Is2Addr, + !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), + []>, OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPALIGN : sse3_palign<"vpalignr", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PALIGN : sse3_palign<"palignr">; + let AddedComplexity = 5 in { def : Pat<(v1i64 (palign:$src3 VR64:$src1, VR64:$src2)), @@ -2996,10 +3355,6 @@ def : Pat<(v2i32 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(v2f32 (palign:$src3 VR64:$src1, VR64:$src2)), - (PALIGNR64rr VR64:$src2, VR64:$src1, - (SHUFFLE_get_palign_imm VR64:$src3))>, - Requires<[HasSSSE3]>; def : Pat<(v4i16 (palign:$src3 VR64:$src1, VR64:$src2)), (PALIGNR64rr VR64:$src2, VR64:$src1, (SHUFFLE_get_palign_imm VR64:$src3))>, @@ -3027,10 +3382,15 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), Requires<[HasSSSE3]>; } -def : Pat<(X86pshufb VR128:$src, VR128:$mask), - (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; -def : Pat<(X86pshufb VR128:$src, (bc_v16i8 (memopv2i64 addr:$mask))), - (PSHUFBrm128 VR128:$src, addr:$mask)>, Requires<[HasSSSE3]>; +//===---------------------------------------------------------------------===// +// SSSE3 Misc Instructions +//===---------------------------------------------------------------------===// + +// Thread synchronization +def MONITOR : I<0x01, MRM_C8, (outs), (ins), "monitor", + [(int_x86_sse3_monitor EAX, ECX, EDX)]>,TB, Requires<[HasSSE3]>; +def MWAIT : I<0x01, MRM_C9, (outs), (ins), "mwait", + [(int_x86_sse3_mwait ECX, EAX)]>, TB, Requires<[HasSSE3]>; //===---------------------------------------------------------------------===// // Non-Instruction Patterns @@ -3277,320 +3637,42 @@ def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))), def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), (Int_CVTDQ2PSrr VR128:$src)>, Requires<[HasSSE2]>; def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), - (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), - (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; -def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), - (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; - -// Use movaps / movups for SSE integer load / store (one byte shorter). -def : Pat<(alignedloadv4i32 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv4i32 addr:$src), - (MOVUPSrm addr:$src)>; -def : Pat<(alignedloadv2i64 addr:$src), - (MOVAPSrm addr:$src)>; -def : Pat<(loadv2i64 addr:$src), - (MOVUPSrm addr:$src)>; - -def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), - (MOVAPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v2i64 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v4i32 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v8i16 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; -def : Pat<(store (v16i8 VR128:$src), addr:$dst), - (MOVUPSmr addr:$dst, VR128:$src)>; - -//===----------------------------------------------------------------------===// -// SSE4.1 Instructions -//===----------------------------------------------------------------------===// - -multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, - string OpcodeStr, - Intrinsic V4F32Int, - Intrinsic V2F64Int> { - // Intrinsic operation, reg. - // Vector intrinsic operation, reg - def PSr_Int : SS4AIi8, - OpSize; - - // Vector intrinsic operation, mem - def PSm_Int : Ii8, - TA, OpSize, - Requires<[HasSSE41]>; - - // Vector intrinsic operation, reg - def PDr_Int : SS4AIi8, - OpSize; - - // Vector intrinsic operation, mem - def PDm_Int : SS4AIi8, - OpSize; -} - -let Constraints = "$src1 = $dst" in { -multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, - string OpcodeStr, - Intrinsic F32Int, - Intrinsic F64Int> { - // Intrinsic operation, reg. - def SSr_Int : SS4AIi8, - OpSize; - - // Intrinsic operation, mem. - def SSm_Int : SS4AIi8, - OpSize; - - // Intrinsic operation, reg. - def SDr_Int : SS4AIi8, - OpSize; - - // Intrinsic operation, mem. - def SDm_Int : SS4AIi8, - OpSize; -} -} - -// FP round - roundss, roundps, roundsd, roundpd -defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", - int_x86_sse41_round_ps, int_x86_sse41_round_pd>; -defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", - int_x86_sse41_round_ss, int_x86_sse41_round_sd>; - -// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. -multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, - Intrinsic IntId128> { - def rr128 : SS48I, OpSize; - def rm128 : SS48I, OpSize; -} - -defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", - int_x86_sse41_phminposuw>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS48I, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I, OpSize; - } -} - -defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", - int_x86_sse41_pcmpeqq, 1>; -defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", - int_x86_sse41_packusdw, 0>; -defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", - int_x86_sse41_pminsb, 1>; -defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", - int_x86_sse41_pminsd, 1>; -defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", - int_x86_sse41_pminud, 1>; -defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", - int_x86_sse41_pminuw, 1>; -defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", - int_x86_sse41_pmaxsb, 1>; -defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", - int_x86_sse41_pmaxsd, 1>; -defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", - int_x86_sse41_pmaxud, 1>; -defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", - int_x86_sse41_pmaxuw, 1>; - -defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>; - -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), - (PCMPEQQrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), - (PCMPEQQrm VR128:$src1, addr:$src2)>; - -/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_patint opc, string OpcodeStr, ValueType OpVT, - SDNode OpNode, Intrinsic IntId128, - bit Commutable = 0> { - def rr : SS48I, OpSize { - let isCommutable = Commutable; - } - def rr_int : SS48I, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I, OpSize; - def rm_int : SS48I, - OpSize; - } -} - -/// SS48I_binop_rm - Simple SSE41 binary operator. -let Constraints = "$src1 = $dst" in { -multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Commutable = 0> { - def rr : SS48I, - OpSize { - let isCommutable = Commutable; - } - def rm : SS48I, - OpSize; -} -} - -defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>; - -/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate -let Constraints = "$src1 = $dst" in { - multiclass SS41I_binop_rmi_int opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rri : SS4AIi8, - OpSize { - let isCommutable = Commutable; - } - def rmi : SS4AIi8, - OpSize; - } -} - -defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", - int_x86_sse41_blendps, 0>; -defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", - int_x86_sse41_blendpd, 0>; -defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", - int_x86_sse41_pblendw, 0>; -defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", - int_x86_sse41_dpps, 1>; -defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", - int_x86_sse41_dppd, 1>; -defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", - int_x86_sse41_mpsadbw, 0>; - - -/// SS41I_ternary_int - SSE 4.1 ternary operator -let Uses = [XMM0], Constraints = "$src1 = $dst" in { - multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { - def rr0 : SS48I, - OpSize; + (Int_CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2f64 (sint_to_fp (v2i32 VR64:$src))), + (Int_CVTPI2PDrr VR64:$src)>, Requires<[HasSSE2]>; +def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))), + (Int_CVTTPD2PIrr VR128:$src)>, Requires<[HasSSE2]>; - def rm0 : SS48I, OpSize; - } -} +// Use movaps / movups for SSE integer load / store (one byte shorter). +def : Pat<(alignedloadv4i32 addr:$src), + (MOVAPSrm addr:$src)>; +def : Pat<(loadv4i32 addr:$src), + (MOVUPSrm addr:$src)>; +def : Pat<(alignedloadv2i64 addr:$src), + (MOVAPSrm addr:$src)>; +def : Pat<(loadv2i64 addr:$src), + (MOVUPSrm addr:$src)>; -defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; -defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; -defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; +def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), + (MOVAPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v2i64 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v4i32 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v8i16 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +def : Pat<(store (v16i8 VR128:$src), addr:$dst), + (MOVUPSmr addr:$dst, VR128:$src)>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Packed Move with Sign/Zero Extend +//===----------------------------------------------------------------------===// multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { def rr : SS48I opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, + VEX; +defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, + VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int8<0x25, "vpmovsxdq", int_x86_sse41_pmovsxdq>, + VEX; +defm VPMOVZXBW : SS41I_binop_rm_int8<0x30, "vpmovzxbw", int_x86_sse41_pmovzxbw>, + VEX; +defm VPMOVZXWD : SS41I_binop_rm_int8<0x33, "vpmovzxwd", int_x86_sse41_pmovzxwd>, + VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, + VEX; +} + defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; @@ -3655,6 +3752,17 @@ multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, + VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, + VEX; +defm VPMOVZXBD : SS41I_binop_rm_int4<0x31, "vpmovzxbd", int_x86_sse41_pmovzxbd>, + VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, + VEX; +} + defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; @@ -3685,6 +3793,12 @@ multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, + VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, + VEX; +} defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; @@ -3699,6 +3813,9 @@ def : Pat<(int_x86_sse41_pmovzxbq (v4i32 (scalar_to_vector (loadi32 addr:$src))))))), (PMOVZXBQrm addr:$src)>, Requires<[HasSSE41]>; +//===----------------------------------------------------------------------===// +// SSE4.1 - Extract Instructions +//===----------------------------------------------------------------------===// /// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem multiclass SS41I_extract8 opc, string OpcodeStr> { @@ -3718,6 +3835,9 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; + defm PEXTRB : SS41I_extract8<0x14, "pextrb">; @@ -3733,127 +3853,607 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -defm PEXTRW : SS41I_extract16<0x15, "pextrw">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; + +defm PEXTRW : SS41I_extract16<0x15, "pextrw">; + + +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract32 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize; + def mr : SS4AIi8, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; + +defm PEXTRD : SS41I_extract32<0x16, "pextrd">; + +/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination +multiclass SS41I_extract64 opc, string OpcodeStr> { + def rr : SS4AIi8, OpSize, REX_W; + def mr : SS4AIi8, OpSize, REX_W; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; + +defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; + +/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory +/// destination +multiclass SS41I_extractf32 opc, string OpcodeStr> { + def rr : SS4AIi8, + OpSize; + def mr : SS4AIi8, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; +defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; + +// Also match an EXTRACTPS store when the store is done as f32 instead of i32. +def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), + imm:$src2))), + addr:$dst), + (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, + Requires<[HasSSE41]>; + +//===----------------------------------------------------------------------===// +// SSE4.1 - Insert Instructions +//===----------------------------------------------------------------------===// + +multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, OpSize; + def rm : SS4AIi8, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRB : SS41I_insert8<0x20, "pinsrb">; + +multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PINSRD : SS41I_insert32<0x22, "pinsrd">; + +multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; +let Constraints = "$src1 = $dst" in + defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; + +// insertps has a few different modes, there's the first two here below which +// are optimized inserts that won't zero arbitrary elements in the destination +// vector. The next one matches the intrinsic and could zero arbitrary elements +// in the target vector. +multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { + def rr : SS4AIi8, + OpSize; + def rm : SS4AIi8, OpSize; +} + +let Constraints = "$src1 = $dst" in + defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; + +def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), + (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; + +//===----------------------------------------------------------------------===// +// SSE4.1 - Round Instructions +//===----------------------------------------------------------------------===// + +multiclass sse41_fp_unop_rm opcps, bits<8> opcpd, + string OpcodeStr, + Intrinsic V4F32Int, + Intrinsic V2F64Int> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr_Int : SS4AIi8, + OpSize; + + // Vector intrinsic operation, mem + def PSm_Int : Ii8, + TA, OpSize, + Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr_Int : SS4AIi8, + OpSize; + + // Vector intrinsic operation, mem + def PDm_Int : SS4AIi8, + OpSize; +} + +multiclass sse41_fp_unop_rm_avx opcps, bits<8> opcpd, + string OpcodeStr> { + // Intrinsic operation, reg. + // Vector intrinsic operation, reg + def PSr : SS4AIi8, OpSize; + + // Vector intrinsic operation, mem + def PSm : Ii8, TA, OpSize, Requires<[HasSSE41]>; + + // Vector intrinsic operation, reg + def PDr : SS4AIi8, OpSize; + + // Vector intrinsic operation, mem + def PDm : SS4AIi8, OpSize; +} + +multiclass sse41_fp_binop_rm opcss, bits<8> opcsd, + string OpcodeStr, + Intrinsic F32Int, + Intrinsic F64Int, bit Is2Addr = 1> { + // Intrinsic operation, reg. + def SSr_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, mem. + def SSm_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, reg. + def SDr_Int : SS4AIi8, + OpSize; + + // Intrinsic operation, mem. + def SDm_Int : SS4AIi8, + OpSize; +} + +multiclass sse41_fp_binop_rm_avx opcss, bits<8> opcsd, + string OpcodeStr> { + // Intrinsic operation, reg. + def SSr : SS4AIi8, OpSize; + + // Intrinsic operation, mem. + def SSm : SS4AIi8, OpSize; + + // Intrinsic operation, reg. + def SDr : SS4AIi8, OpSize; + + // Intrinsic operation, mem. + def SDm : SS4AIi8, OpSize; +} + +// FP round - roundss, roundps, roundsd, roundpd +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + // Intrinsic form + defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>, + VEX; + defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", + int_x86_sse41_round_ss, int_x86_sse41_round_sd, + 0>, VEX_4V; + // Instructions for the assembler + defm VROUND : sse41_fp_unop_rm_avx<0x08, 0x09, "vround">, VEX; + defm VROUND : sse41_fp_binop_rm_avx<0x0A, 0x0B, "vround">, VEX_4V; +} + +defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", + int_x86_sse41_round_ps, int_x86_sse41_round_pd>; +let Constraints = "$src1 = $dst" in +defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", + int_x86_sse41_round_ss, int_x86_sse41_round_sd>; + +//===----------------------------------------------------------------------===// +// SSE4.1 - Misc Instructions +//===----------------------------------------------------------------------===// + +// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. +multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, + Intrinsic IntId128> { + def rr128 : SS48I, OpSize; + def rm128 : SS48I, OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in +defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", + int_x86_sse41_phminposuw>, VEX; +defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", + int_x86_sse41_phminposuw>; +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +multiclass SS41I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I, OpSize; + def rm : SS48I, OpSize; +} -/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination -multiclass SS41I_extract32 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize; - def mr : SS4AIi8, OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, + 0>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int<0x29, "vpcmpeqq", int_x86_sse41_pcmpeqq, + 0>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int<0x38, "vpminsb", int_x86_sse41_pminsb, + 0>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int<0x39, "vpminsd", int_x86_sse41_pminsd, + 0>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int<0x3B, "vpminud", int_x86_sse41_pminud, + 0>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int<0x3A, "vpminuw", int_x86_sse41_pminuw, + 0>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int<0x3C, "vpmaxsb", int_x86_sse41_pmaxsb, + 0>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int<0x3D, "vpmaxsd", int_x86_sse41_pmaxsd, + 0>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int<0x3F, "vpmaxud", int_x86_sse41_pmaxud, + 0>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int<0x3E, "vpmaxuw", int_x86_sse41_pmaxuw, + 0>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int<0x28, "vpmuldq", int_x86_sse41_pmuldq, + 0>, VEX_4V; } -defm PEXTRD : SS41I_extract32<0x16, "pextrd">; +let Constraints = "$src1 = $dst" in { + let isCommutable = 0 in + defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; + defm PCMPEQQ : SS41I_binop_rm_int<0x29, "pcmpeqq", int_x86_sse41_pcmpeqq>; + defm PMINSB : SS41I_binop_rm_int<0x38, "pminsb", int_x86_sse41_pminsb>; + defm PMINSD : SS41I_binop_rm_int<0x39, "pminsd", int_x86_sse41_pminsd>; + defm PMINUD : SS41I_binop_rm_int<0x3B, "pminud", int_x86_sse41_pminud>; + defm PMINUW : SS41I_binop_rm_int<0x3A, "pminuw", int_x86_sse41_pminuw>; + defm PMAXSB : SS41I_binop_rm_int<0x3C, "pmaxsb", int_x86_sse41_pmaxsb>; + defm PMAXSD : SS41I_binop_rm_int<0x3D, "pmaxsd", int_x86_sse41_pmaxsd>; + defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud", int_x86_sse41_pmaxud>; + defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw", int_x86_sse41_pmaxuw>; + defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq>; +} +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)), + (PCMPEQQrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), + (PCMPEQQrm VR128:$src1, addr:$src2)>; -/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory -/// destination -multiclass SS41I_extractf32 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def mr : SS4AIi8, OpSize; +/// SS48I_binop_rm - Simple SSE41 binary operator. +multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT, bit Is2Addr = 1> { + let isCommutable = 1 in + def rr : SS48I, + OpSize; + def rm : SS48I, + OpSize; } -defm EXTRACTPS : SS41I_extractf32<0x17, "extractps">; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; -// Also match an EXTRACTPS store when the store is done as f32 instead of i32. -def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), - imm:$src2))), - addr:$dst), - (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, - Requires<[HasSSE41]>; +/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate +multiclass SS41I_binop_rmi_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + let isCommutable = 1 in + def rri : SS4AIi8, + OpSize; + def rmi : SS4AIi8, + OpSize; +} -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert8 opc, string OpcodeStr> { - def rr : SS4AIi8, OpSize; - def rm : SS4AIi8, OpSize; +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + let isCommutable = 0 in { + defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, + 0>, VEX_4V; + defm VBLENDPD : SS41I_binop_rmi_int<0x0D, "vblendpd", int_x86_sse41_blendpd, + 0>, VEX_4V; + defm VPBLENDW : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_sse41_pblendw, + 0>, VEX_4V; + defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, + 0>, VEX_4V; } + defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, + 0>, VEX_4V; + defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, + 0>, VEX_4V; } -defm PINSRB : SS41I_insert8<0x20, "pinsrb">; - let Constraints = "$src1 = $dst" in { - multiclass SS41I_insert32 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def rm : SS4AIi8, OpSize; + let isCommutable = 0 in { + defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps>; + defm BLENDPD : SS41I_binop_rmi_int<0x0D, "blendpd", int_x86_sse41_blendpd>; + defm PBLENDW : SS41I_binop_rmi_int<0x0E, "pblendw", int_x86_sse41_pblendw>; + defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw>; } + defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps>; + defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd>; } -defm PINSRD : SS41I_insert32<0x22, "pinsrd">; +/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators +let isAsmParserOnly = 1, Predicates = [HasAVX] in { + multiclass SS41I_quaternary_int_avx opc, string OpcodeStr> { + def rr : I, OpSize, TA, VEX_4V, VEX_I8IMM; -// insertps has a few different modes, there's the first two here below which -// are optimized inserts that won't zero arbitrary elements in the destination -// vector. The next one matches the intrinsic and could zero arbitrary elements -// in the target vector. -let Constraints = "$src1 = $dst" in { - multiclass SS41I_insertf32 opc, string OpcodeStr> { - def rr : SS4AIi8, - OpSize; - def rm : SS4AIi8, OpSize; + def rm : I, OpSize, TA, VEX_4V, VEX_I8IMM; } } -defm INSERTPS : SS41I_insertf32<0x21, "insertps">; +defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd">; +defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps">; +defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb">; -def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), - (INSERTPSrr VR128:$src1, VR128:$src2, imm:$src3)>; +/// SS41I_ternary_int - SSE 4.1 ternary operator +let Uses = [XMM0], Constraints = "$src1 = $dst" in { + multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { + def rr0 : SS48I, + OpSize; + + def rm0 : SS48I, OpSize; + } +} + +defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", int_x86_sse41_blendvpd>; +defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", int_x86_sse41_blendvps>; +defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. +let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>, + OpSize, VEX; +def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), + "vptest\t{$src2, $src1|$src1, $src2}", + [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>, + OpSize, VEX; +} + let Defs = [EFLAGS] in { def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ptest \t{$src2, $src1|$src1, $src2}", @@ -3865,43 +4465,207 @@ def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2), OpSize; } +let isAsmParserOnly = 1, Predicates = [HasAVX] in +def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, OpSize; - //===----------------------------------------------------------------------===// -// SSE4.2 Instructions +// SSE4.2 - Compare Instructions //===----------------------------------------------------------------------===// /// SS42I_binop_rm_int - Simple SSE 4.2 binary operator -let Constraints = "$src1 = $dst" in { - multiclass SS42I_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : SS428I, - OpSize { - let isCommutable = Commutable; - } - def rm : SS428I, OpSize; - } +multiclass SS42I_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : SS428I, + OpSize; + def rm : SS428I, OpSize; } -defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; +let isAsmParserOnly = 1, Predicates = [HasAVX] in + defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, + 0>, VEX_4V; +let Constraints = "$src1 = $dst" in + defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), (PCMPGTQrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; +//===----------------------------------------------------------------------===// +// SSE4.2 - String/text Processing Instructions +//===----------------------------------------------------------------------===// + +// Packed Compare Implicit Length Strings, Return Mask +let Defs = [EFLAGS], usesCustomInserter = 1 in { + def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "#PCMPISTRM128rr PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, + imm:$src3))]>, OpSize; + def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "#PCMPISTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 + VR128:$src1, (load addr:$src2), imm:$src3))]>, OpSize; +} + +let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, + Predicates = [HasAVX] in { + def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS] in { + def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; +} + +// Packed Compare Explicit Length Strings, Return Mask +let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { + def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "#PCMPESTRM128rr PSEUDO!", + [(set VR128:$dst, + (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; + + def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "#PCMPESTRM128rm PSEUDO!", + [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 + VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, + OpSize; +} + +let isAsmParserOnly = 1, Predicates = [HasAVX], + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; +} + +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; +} + +// Packed Compare Implicit Length Strings, Return Index +let Defs = [ECX, EFLAGS] in { + multiclass SS42AI_pcmpistri { + def rr : SS42AI<0x63, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x63, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), + !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), + [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPISTRI : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIA : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIC : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIO : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIS : SS42AI_pcmpistri, + VEX; +defm VPCMPISTRIZ : SS42AI_pcmpistri, + VEX; +} + +defm PCMPISTRI : SS42AI_pcmpistri; +defm PCMPISTRIA : SS42AI_pcmpistri; +defm PCMPISTRIC : SS42AI_pcmpistri; +defm PCMPISTRIO : SS42AI_pcmpistri; +defm PCMPISTRIS : SS42AI_pcmpistri; +defm PCMPISTRIZ : SS42AI_pcmpistri; + +// Packed Compare Explicit Length Strings, Return Index +let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { + multiclass SS42AI_pcmpestri { + def rr : SS42AI<0x61, MRMSrcReg, (outs), + (ins VR128:$src1, VR128:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + def rm : SS42AI<0x61, MRMSrcMem, (outs), + (ins VR128:$src1, i128mem:$src3, i8imm:$src5), + !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), + [(set ECX, + (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), + (implicit EFLAGS)]>, OpSize; + } +} + +let isAsmParserOnly = 1, Predicates = [HasAVX] in { +defm VPCMPESTRI : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIA : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIC : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIO : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIS : SS42AI_pcmpestri, + VEX; +defm VPCMPESTRIZ : SS42AI_pcmpestri, + VEX; +} + +defm PCMPESTRI : SS42AI_pcmpestri; +defm PCMPESTRIA : SS42AI_pcmpestri; +defm PCMPESTRIC : SS42AI_pcmpestri; +defm PCMPESTRIO : SS42AI_pcmpestri; +defm PCMPESTRIS : SS42AI_pcmpestri; +defm PCMPESTRIZ : SS42AI_pcmpestri; + +//===----------------------------------------------------------------------===// +// SSE4.2 - CRC Instructions +//===----------------------------------------------------------------------===// + +// No CRC instructions have AVX equivalents + // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -3969,133 +4733,52 @@ let Constraints = "$src1 = $dst" in { REX_W; } -// String/text processing instructions. -let Defs = [EFLAGS], usesCustomInserter = 1 in { -def PCMPISTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "#PCMPISTRM128rr PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, - imm:$src3))]>, OpSize; -def PCMPISTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "#PCMPISTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, (load addr:$src2), - imm:$src3))]>, OpSize; -} - -let Defs = [XMM0, EFLAGS] in { -def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; -} - -let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { -def PCMPESTRM128REG : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "#PCMPESTRM128rr PSEUDO!", - [(set VR128:$dst, - (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>, OpSize; - -def PCMPESTRM128MEM : SS42AI<0, Pseudo, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "#PCMPESTRM128rm PSEUDO!", - [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 - VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5))]>, - OpSize; -} - -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { -def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; -} +//===----------------------------------------------------------------------===// +// AES-NI Instructions +//===----------------------------------------------------------------------===// -let Defs = [ECX, EFLAGS] in { - multiclass SS42AI_pcmpistri { - def rr : SS42AI<0x63, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, VR128:$src2, imm:$src3)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x63, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src2, i8imm:$src3), - "pcmpistri\t{$src3, $src2, $src1|$src1, $src2, $src3}", - [(set ECX, (IntId128 VR128:$src1, (load addr:$src2), imm:$src3)), - (implicit EFLAGS)]>, OpSize; - } +multiclass AESI_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Is2Addr = 1> { + def rr : AES8I, + OpSize; + def rm : AES8I, OpSize; } -defm PCMPISTRI : SS42AI_pcmpistri; -defm PCMPISTRIA : SS42AI_pcmpistri; -defm PCMPISTRIC : SS42AI_pcmpistri; -defm PCMPISTRIO : SS42AI_pcmpistri; -defm PCMPISTRIS : SS42AI_pcmpistri; -defm PCMPISTRIZ : SS42AI_pcmpistri; - -let Defs = [ECX, EFLAGS] in { -let Uses = [EAX, EDX] in { - multiclass SS42AI_pcmpestri { - def rr : SS42AI<0x61, MRMSrcReg, (outs), - (ins VR128:$src1, VR128:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, (IntId128 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - def rm : SS42AI<0x61, MRMSrcMem, (outs), - (ins VR128:$src1, i128mem:$src3, i8imm:$src5), - "pcmpestri\t{$src5, $src3, $src1|$src1, $src3, $src5}", - [(set ECX, - (IntId128 VR128:$src1, EAX, (load addr:$src3), EDX, imm:$src5)), - (implicit EFLAGS)]>, OpSize; - } -} +// Perform One Round of an AES Encryption/Decryption Flow +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", + int_x86_aesni_aesenc, 0>, VEX_4V; + defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", + int_x86_aesni_aesenclast, 0>, VEX_4V; + defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", + int_x86_aesni_aesdec, 0>, VEX_4V; + defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", + int_x86_aesni_aesdeclast, 0>, VEX_4V; } -defm PCMPESTRI : SS42AI_pcmpestri; -defm PCMPESTRIA : SS42AI_pcmpestri; -defm PCMPESTRIC : SS42AI_pcmpestri; -defm PCMPESTRIO : SS42AI_pcmpestri; -defm PCMPESTRIS : SS42AI_pcmpestri; -defm PCMPESTRIZ : SS42AI_pcmpestri; - -//===----------------------------------------------------------------------===// -// AES-NI Instructions -//===----------------------------------------------------------------------===// - let Constraints = "$src1 = $dst" in { - multiclass AESI_binop_rm_int opc, string OpcodeStr, - Intrinsic IntId128, bit Commutable = 0> { - def rr : AES8I, - OpSize { - let isCommutable = Commutable; - } - def rm : AES8I, OpSize; - } + defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; + defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; + defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; + defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; } -defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", - int_x86_aesni_aesenc>; -defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", - int_x86_aesni_aesenclast>; -defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", - int_x86_aesni_aesdec>; -defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", - int_x86_aesni_aesdeclast>; - def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), (AESENCrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), @@ -4113,13 +4796,27 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), (AESDECLASTrm VR128:$src1, addr:$src2)>; +// Perform the AES InvMixColumn Transformation +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc VR128:$src1))]>, + OpSize, VEX; + def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1), + "vaesimc\t{$src1, $dst|$dst, $src1}", + [(set VR128:$dst, + (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, + OpSize, VEX; +} def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", [(set VR128:$dst, (int_x86_aesni_aesimc VR128:$src1))]>, OpSize; - def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src1), "aesimc\t{$src1, $dst|$dst, $src1}", @@ -4127,6 +4824,22 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), (int_x86_aesni_aesimc (bitconvert (memopv2i64 addr:$src1))))]>, OpSize; +// AES Round Key Generation Assist +let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { + def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize, VEX; + def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i8imm:$src2), + "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize, VEX; +} def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index a9681e6..633ddd4 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -30,7 +30,7 @@ class X86MCCodeEmitter : public MCCodeEmitter { MCContext &Ctx; bool Is64BitMode; public: - X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) + X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit) : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) { Is64BitMode = is64Bit; } @@ -38,17 +38,18 @@ public: ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 4; + return 5; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[] = { { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_pcrel_2byte", 0, 2 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } }; - + if (Kind < FirstTargetFixupKind) return MCCodeEmitter::getFixupKindInfo(Kind); @@ -56,16 +57,38 @@ public: "Invalid kind!"); return Infos[Kind - FirstTargetFixupKind]; } - + static unsigned GetX86RegNum(const MCOperand &MO) { return X86RegisterInfo::getX86RegNum(MO.getReg()); } - + + // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range + // 0-7 and the difference between the 2 groups is given by the REX prefix. + // In the VEX prefix, registers are seen sequencially from 0-15 and encoded + // in 1's complement form, example: + // + // ModRM field => XMM9 => 1 + // VEX.VVVV => XMM9 => ~9 + // + // See table 4-35 of Intel AVX Programming Reference for details. + static unsigned char getVEXRegisterEncoding(const MCInst &MI, + unsigned OpNum) { + unsigned SrcReg = MI.getOperand(OpNum).getReg(); + unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum)); + if ((SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15) || + (SrcReg >= X86::YMM8 && SrcReg <= X86::YMM15)) + SrcRegNum += 8; + + // The registers represented through VEX_VVVV should + // be encoded in 1's complement form. + return (~SrcRegNum) & 0xf; + } + void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const { OS << (char)C; ++CurByte; } - + void EmitConstant(uint64_t Val, unsigned Size, unsigned &CurByte, raw_ostream &OS) const { // Output the constant in little endian byte order. @@ -75,38 +98,49 @@ public: } } - void EmitImmediate(const MCOperand &Disp, + void EmitImmediate(const MCOperand &Disp, unsigned ImmSize, MCFixupKind FixupKind, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl &Fixups, int ImmOffset = 0) const; - + inline static unsigned char ModRMByte(unsigned Mod, unsigned RegOpcode, unsigned RM) { assert(Mod < 4 && RegOpcode < 8 && RM < 8 && "ModRM Fields out of range!"); return RM | (RegOpcode << 3) | (Mod << 6); } - + void EmitRegModRMByte(const MCOperand &ModRMReg, unsigned RegOpcodeFld, unsigned &CurByte, raw_ostream &OS) const { EmitByte(ModRMByte(3, RegOpcodeFld, GetX86RegNum(ModRMReg)), CurByte, OS); } - + void EmitSIBByte(unsigned SS, unsigned Index, unsigned Base, unsigned &CurByte, raw_ostream &OS) const { // SIB byte is in the same format as the ModRMByte. EmitByte(ModRMByte(SS, Index, Base), CurByte, OS); } - - + + void EmitMemModRMByte(const MCInst &MI, unsigned Op, - unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, raw_ostream &OS, + unsigned RegOpcodeField, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl &Fixups) const; - + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const; - + + void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; + + void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + raw_ostream &OS) const; + + void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand, + const MCInst &MI, const TargetInstrDesc &Desc, + raw_ostream &OS) const; }; } // end anonymous namespace @@ -124,24 +158,23 @@ MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &, return new X86MCCodeEmitter(TM, Ctx, true); } - -/// isDisp8 - Return true if this signed displacement fits in a 8-bit -/// sign-extended field. +/// isDisp8 - Return true if this signed displacement fits in a 8-bit +/// sign-extended field. static bool isDisp8(int Value) { return Value == (signed char)Value; } /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate /// in an instruction with the specified TSFlags. -static MCFixupKind getImmFixupKind(unsigned TSFlags) { +static MCFixupKind getImmFixupKind(uint64_t TSFlags) { unsigned Size = X86II::getSizeOfImm(TSFlags); bool isPCRel = X86II::isImmPCRel(TSFlags); - + switch (Size) { default: assert(0 && "Unknown immediate size"); case 1: return isPCRel ? MCFixupKind(X86::reloc_pcrel_1byte) : FK_Data_1; + case 2: return isPCRel ? MCFixupKind(X86::reloc_pcrel_2byte) : FK_Data_2; case 4: return isPCRel ? MCFixupKind(X86::reloc_pcrel_4byte) : FK_Data_4; - case 2: assert(!isPCRel); return FK_Data_2; case 8: assert(!isPCRel); return FK_Data_8; } } @@ -162,29 +195,30 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If we have an immoffset, add it to the expression. const MCExpr *Expr = DispOp.getExpr(); - + // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; + if (FixupKind == MCFixupKind(X86::reloc_pcrel_2byte)) + ImmOffset -= 2; if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) ImmOffset -= 1; - + if (ImmOffset) Expr = MCBinaryExpr::CreateAdd(Expr, MCConstantExpr::Create(ImmOffset, Ctx), Ctx); - + // Emit a symbolic constant as a fixup and 4 zeros. Fixups.push_back(MCFixup::Create(CurByte, Expr, FixupKind)); EmitConstant(0, Size, CurByte, OS); } - void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField, - unsigned TSFlags, unsigned &CurByte, + uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS, SmallVectorImpl &Fixups) const{ const MCOperand &Disp = MI.getOperand(Op+3); @@ -192,43 +226,43 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, const MCOperand &Scale = MI.getOperand(Op+1); const MCOperand &IndexReg = MI.getOperand(Op+2); unsigned BaseReg = Base.getReg(); - + // Handle %rip relative addressing. if (BaseReg == X86::RIP) { // [disp32+RIP] in X86-64 mode - assert(IndexReg.getReg() == 0 && Is64BitMode && - "Invalid rip-relative address"); + assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode"); + assert(IndexReg.getReg() == 0 && "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); - + unsigned FixupKind = X86::reloc_riprel_4byte; - + // movq loads are handled with a special relocation form which allows the // linker to eliminate some loads for GOT references which end up in the // same linkage unit. if (MI.getOpcode() == X86::MOV64rm || MI.getOpcode() == X86::MOV64rm_TC) FixupKind = X86::reloc_riprel_4byte_movq_load; - + // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this // means that we need to bias the immediate field of the instruction with // the size of the immediate field. If we have this case, add it into the // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - + EmitImmediate(Disp, 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } - + unsigned BaseRegNo = BaseReg ? GetX86RegNum(Base) : -1U; - + // Determine whether a SIB byte is needed. - // If no BaseReg, issue a RIP relative instruction only if the MCE can + // If no BaseReg, issue a RIP relative instruction only if the MCE can // resolve addresses on-the-fly, otherwise use SIB (Intel Manual 2A, table // 2-7) and absolute references. if (// The SIB byte must be used if there is an index register. - IndexReg.getReg() == 0 && + IndexReg.getReg() == 0 && // The SIB byte must be used if the base is ESP/RSP/R12, all of which // encode to an R/M value of 4, which indicates that a SIB byte is // present. @@ -242,7 +276,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // If the base is not EBP/ESP and there is no displacement, use simple // indirect register encoding, this handles addresses like [EAX]. The // encoding for [EBP] with no displacement means [disp32] so we handle it @@ -251,24 +285,24 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitByte(ModRMByte(0, RegOpcodeField, BaseRegNo), CurByte, OS); return; } - + // Otherwise, if the displacement fits in a byte, encode as [REG+disp8]. if (Disp.isImm() && isDisp8(Disp.getImm())) { EmitByte(ModRMByte(1, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); return; } - + // Otherwise, emit the most general non-SIB encoding: [REG+disp32] EmitByte(ModRMByte(2, RegOpcodeField, BaseRegNo), CurByte, OS); EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); return; } - + // We need a SIB byte, so start by outputting the ModR/M byte first assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); - + bool ForceDisp32 = false; bool ForceDisp8 = false; if (BaseReg == 0) { @@ -294,13 +328,13 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); } - + // Calculate what the SS field value should be... static const unsigned SSTable[] = { ~0, 0, 1, ~0, 2, ~0, ~0, ~0, 3 }; unsigned SS = SSTable[Scale.getImm()]; - + if (BaseReg == 0) { - // Handle the SIB byte for the case where there is no base, see Intel + // Handle the SIB byte for the case where there is no base, see Intel // Manual 2A, table 2-7. The displacement has already been output. unsigned IndexRegNo; if (IndexReg.getReg()) @@ -316,7 +350,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, IndexRegNo = 4; // For example [ESP+1*+4] EmitSIBByte(SS, IndexRegNo, GetX86RegNum(Base), CurByte, OS); } - + // Do we need to output a displacement? if (ForceDisp8) EmitImmediate(Disp, 1, FK_Data_1, CurByte, OS, Fixups); @@ -324,26 +358,216 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups); } +/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix +/// called VEX. +void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + bool HasVEX_4V = false; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form + // + // 1: Same as REX_R=0 (must be 1 in 32-bit mode) + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. + // + // 1: Same as REX.X=0 (must be 1 in 32-bit mode) + // 0: Same as REX.X=1 (64-bit mode only) + unsigned char VEX_X = 0x1; + + // VEX_B: + // + // 1: Same as REX_B=0 (ignored in 32-bit mode) + // 0: Same as REX_B=1 (64 bit mode only) + // + unsigned char VEX_B = 0x1; + + // VEX_W: opcode specific (use like REX.W, or used for + // opcode extension, or ignored, depending on the opcode byte) + unsigned char VEX_W = 0; + + // VEX_5M (VEX m-mmmmm field): + // + // 0b00000: Reserved for future use + // 0b00001: implied 0F leading opcode + // 0b00010: implied 0F 38 leading opcode bytes + // 0b00011: implied 0F 3A leading opcode bytes + // 0b00100-0b11111: Reserved for future use + // + unsigned char VEX_5M = 0x1; + + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; + + // VEX_L (Vector Length): + // + // 0: scalar or 128-bit vector + // 1: 256-bit vector + // + unsigned char VEX_L = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix + // + // 0b00: None + // 0b01: 66 + // 0b10: F3 + // 0b11: F2 + // + unsigned char VEX_PP = 0; + + // Encode the operand size opcode prefix as needed. + if (TSFlags & X86II::OpSize) + VEX_PP = 0x01; + + if (TSFlags & X86II::VEX_W) + VEX_W = 1; + + switch (TSFlags & X86II::Op0Mask) { + default: assert(0 && "Invalid prefix!"); + case X86II::T8: // 0F 38 + VEX_5M = 0x2; + break; + case X86II::TA: // 0F 3A + VEX_5M = 0x3; + break; + case X86II::TF: // F2 0F 38 + VEX_PP = 0x3; + VEX_5M = 0x2; + break; + case X86II::XS: // F3 0F + VEX_PP = 0x2; + break; + case X86II::XD: // F2 0F + VEX_PP = 0x3; + break; + case X86II::TB: // Bypass: Not used by VEX + case 0: + break; // No prefix! + } + + // Set the vector length to 256-bit if YMM0-YMM15 is used + for (unsigned i = 0; i != MI.getNumOperands(); ++i) { + if (!MI.getOperand(i).isReg()) + continue; + unsigned SrcReg = MI.getOperand(i).getReg(); + if (SrcReg >= X86::YMM0 && SrcReg <= X86::YMM15) + VEX_L = 1; + } + + unsigned NumOps = MI.getNumOperands(); + unsigned CurOp = 0; + + switch (TSFlags & X86II::FormMask) { + case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); + case X86II::MRM0m: case X86II::MRM1m: + case X86II::MRM2m: case X86II::MRM3m: + case X86II::MRM4m: case X86II::MRM5m: + case X86II::MRM6m: case X86II::MRM7m: + case X86II::MRMDestMem: + NumOps = CurOp = X86::AddrNumOperands; + case X86II::MRMSrcMem: + case X86II::MRMSrcReg: + if (MI.getNumOperands() > CurOp && MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; + + // CurOp and NumOps are equal when VEX_R represents a register used + // to index a memory destination (which is the last operand) + CurOp = (CurOp == NumOps) ? 0 : CurOp+1; + + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + CurOp++; + } + + // If the last register should be encoded in the immediate field + // do not use any bit from VEX prefix to this register, ignore it + if (TSFlags & X86II::VEX_I8IMM) + NumOps--; + + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_B = 0x0; + if (!VEX_B && MO.isReg() && + ((TSFlags & X86II::FormMask) == X86II::MRMSrcMem) && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_X = 0x0; + } + break; + default: // MRMDestReg, MRM0r-MRM7r + if (MI.getOperand(CurOp).isReg() && + X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0; + + if (HasVEX_4V) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); + + CurOp++; + for (; CurOp != NumOps; ++CurOp) { + const MCOperand &MO = MI.getOperand(CurOp); + if (MO.isReg() && !HasVEX_4V && + X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) + VEX_R = 0x0; + } + break; + assert(0 && "Not implemented!"); + } + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + + // VEX opcode prefix can have 2 or 3 bytes + // + // 3 bytes: + // +-----+ +--------------+ +-------------------+ + // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | + // +-----+ +--------------+ +-------------------+ + // 2 bytes: + // +-----+ +-------------------+ + // | C5h | | R | vvvv | L | pp | + // +-----+ +-------------------+ + // + unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); + + if (VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { // 2 byte VEX prefix + EmitByte(0xC5, CurByte, OS); + EmitByte(LastByte | (VEX_R << 7), CurByte, OS); + return; + } + + // 3 byte VEX prefix + EmitByte(0xC4, CurByte, OS); + EmitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M, CurByte, OS); + EmitByte(LastByte | (VEX_W << 7), CurByte, OS); +} + /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand /// size, and 3) use of X86-64 extended registers. -static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, +static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, const TargetInstrDesc &Desc) { - // Pseudo instructions never have a rex byte. - if ((TSFlags & X86II::FormMask) == X86II::Pseudo) - return 0; - unsigned REX = 0; if (TSFlags & X86II::REX_W) - REX |= 1 << 3; - + REX |= 1 << 3; // set REX.W + if (MI.getNumOperands() == 0) return REX; - + unsigned NumOps = MI.getNumOperands(); // FIXME: MCInst should explicitize the two-addrness. bool isTwoAddr = NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; - + // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. unsigned i = isTwoAddr ? 1 : 0; for (; i != NumOps; ++i) { @@ -353,34 +577,34 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, if (!X86InstrInfo::isX86_64NonExtLowByteReg(Reg)) continue; // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything // that returns non-zero. - REX |= 0x40; + REX |= 0x40; // REX fixed encoding prefix break; } - + switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!"); case X86II::MRMSrcReg: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B } break; case X86II::MRMSrcMem: { if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; i = isTwoAddr ? 2 : 1; for (; i != NumOps; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // set REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -391,17 +615,17 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && MI.getOperand(e).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(e).getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R unsigned Bit = 0; for (; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg()) { if (X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << Bit; + REX |= 1 << Bit; // REX.B (Bit=0) and REX.X (Bit=1) Bit++; } } @@ -410,39 +634,40 @@ static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags, default: if (MI.getOperand(0).isReg() && X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg())) - REX |= 1 << 0; + REX |= 1 << 0; // set REX.B i = isTwoAddr ? 2 : 1; for (unsigned e = NumOps; i != e; ++i) { const MCOperand &MO = MI.getOperand(i); if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg())) - REX |= 1 << 2; + REX |= 1 << 2; // set REX.R } break; } return REX; } -void X86MCCodeEmitter:: -EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups) const { - unsigned Opcode = MI.getOpcode(); - const TargetInstrDesc &Desc = TII.get(Opcode); - unsigned TSFlags = Desc.TSFlags; - - // Keep track of the current byte being emitted. - unsigned CurByte = 0; - - // FIXME: We should emit the prefixes in exactly the same order as GAS does, - // in order to provide diffability. - - // Emit the lock opcode prefix as needed. - if (TSFlags & X86II::LOCK) - EmitByte(0xF0, CurByte, OS); - - // Emit segment override opcode prefix as needed. +/// EmitSegmentOverridePrefix - Emit segment override opcode prefix as needed +void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags, + unsigned &CurByte, int MemOperand, + const MCInst &MI, + raw_ostream &OS) const { switch (TSFlags & X86II::SegOvrMask) { default: assert(0 && "Invalid segment!"); - case 0: break; // No segment override! + case 0: + // No segment override, check for explicit one on memory operand. + if (MemOperand != -1) { // If the instruction has a memory operand. + switch (MI.getOperand(MemOperand+X86::AddrSegmentReg).getReg()) { + default: assert(0 && "Unknown segment register!"); + case 0: break; + case X86::CS: EmitByte(0x2E, CurByte, OS); break; + case X86::SS: EmitByte(0x36, CurByte, OS); break; + case X86::DS: EmitByte(0x3E, CurByte, OS); break; + case X86::ES: EmitByte(0x26, CurByte, OS); break; + case X86::FS: EmitByte(0x64, CurByte, OS); break; + case X86::GS: EmitByte(0x65, CurByte, OS); break; + } + } + break; case X86II::FS: EmitByte(0x64, CurByte, OS); break; @@ -450,19 +675,36 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x65, CurByte, OS); break; } - +} + +/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode. +/// +/// MemOperand is the operand # of the start of a memory operand if present. If +/// Not present, it is -1. +void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + int MemOperand, const MCInst &MI, + const TargetInstrDesc &Desc, + raw_ostream &OS) const { + + // Emit the lock opcode prefix as needed. + if (TSFlags & X86II::LOCK) + EmitByte(0xF0, CurByte, OS); + + // Emit segment override opcode prefix as needed. + EmitSegmentOverridePrefix(TSFlags, CurByte, MemOperand, MI, OS); + // Emit the repeat opcode prefix as needed. if ((TSFlags & X86II::Op0Mask) == X86II::REP) EmitByte(0xF3, CurByte, OS); - + // Emit the operand size opcode prefix as needed. if (TSFlags & X86II::OpSize) EmitByte(0x66, CurByte, OS); - + // Emit the address size opcode prefix as needed. if (TSFlags & X86II::AdSize) EmitByte(0x67, CurByte, OS); - + bool Need0FPrefix = false; switch (TSFlags & X86II::Op0Mask) { default: assert(0 && "Invalid prefix!"); @@ -494,18 +736,18 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::DE: EmitByte(0xDE, CurByte, OS); break; case X86II::DF: EmitByte(0xDF, CurByte, OS); break; } - + // Handle REX prefix. // FIXME: Can this come before F2 etc to simplify emission? if (Is64BitMode) { if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc)) EmitByte(0x40 | REX, CurByte, OS); } - + // 0x0F escape code must be emitted just before the opcode. if (Need0FPrefix) EmitByte(0x0F, CurByte, OS); - + // FIXME: Pull this up into previous switch if REX can be moved earlier. switch (TSFlags & X86II::Op0Mask) { case X86II::TF: // F2 0F 38 @@ -516,8 +758,21 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0x3A, CurByte, OS); break; } - +} + +void X86MCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const { + unsigned Opcode = MI.getOpcode(); + const TargetInstrDesc &Desc = TII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; + // If this is a two-address instruction, skip one of the register operands. + // FIXME: This should be handled during MCInst lowering. unsigned NumOps = Desc.getNumOperands(); unsigned CurOp = 0; if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1) @@ -525,56 +780,85 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 --NumOps; - + + // Keep track of the current byte being emitted. + unsigned CurByte = 0; + + // Is this instruction encoded using the AVX VEX prefix? + bool HasVEXPrefix = false; + + // It uses the VEX.VVVV field? + bool HasVEX_4V = false; + + if (TSFlags & X86II::VEX) + HasVEXPrefix = true; + if (TSFlags & X86II::VEX_4V) + HasVEX_4V = true; + + // Determine where the memory operand starts, if present. + int MemoryOperand = X86II::getMemoryOperandNo(TSFlags); + if (MemoryOperand != -1) MemoryOperand += CurOp; + + if (!HasVEXPrefix) + EmitOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + else + EmitVEXOpcodePrefix(TSFlags, CurByte, MemoryOperand, MI, Desc, OS); + unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); + unsigned SrcRegNum = 0; switch (TSFlags & X86II::FormMask) { case X86II::MRMInitReg: assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!"); default: errs() << "FORM: " << (TSFlags & X86II::FormMask) << "\n"; assert(0 && "Unknown FormMask value in X86MCCodeEmitter!"); - case X86II::Pseudo: return; // Pseudo instructions encode to nothing. + case X86II::Pseudo: + assert(0 && "Pseudo instruction shouldn't be emitted"); case X86II::RawFrm: EmitByte(BaseOpcode, CurByte, OS); break; - + case X86II::AddRegFrm: EmitByte(BaseOpcode + GetX86RegNum(MI.getOperand(CurOp++)), CurByte, OS); break; - + case X86II::MRMDestReg: EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp), GetX86RegNum(MI.getOperand(CurOp+1)), CurByte, OS); CurOp += 2; break; - + case X86II::MRMDestMem: EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, - GetX86RegNum(MI.getOperand(CurOp + X86AddrNumOperands)), + GetX86RegNum(MI.getOperand(CurOp + X86::AddrNumOperands)), TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; break; - + case X86II::MRMSrcReg: EmitByte(BaseOpcode, CurByte, OS); - EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)), - CurByte, OS); - CurOp += 2; + SrcRegNum = CurOp + 1; + + if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV) + SrcRegNum++; + + EmitRegModRMByte(MI.getOperand(SrcRegNum), + GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS); + CurOp = SrcRegNum + 1; break; - + case X86II::MRMSrcMem: { + int AddrOperands = X86::AddrNumOperands; + unsigned FirstMemOp = CurOp+1; + if (HasVEX_4V) { + ++AddrOperands; + ++FirstMemOp; // Skip the register source (which is encoded in VEX_VVVV). + } + EmitByte(BaseOpcode, CurByte, OS); - // FIXME: Maybe lea should have its own form? This is a horrible hack. - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - - EmitMemModRMByte(MI, CurOp+1, GetX86RegNum(MI.getOperand(CurOp)), + EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)), TSFlags, CurByte, OS, Fixups); CurOp += AddrOperands + 1; break; @@ -584,6 +868,8 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, case X86II::MRM2r: case X86II::MRM3r: case X86II::MRM4r: case X86II::MRM5r: case X86II::MRM6r: case X86II::MRM7r: + if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV). + CurOp++; EmitByte(BaseOpcode, CurByte, OS); EmitRegModRMByte(MI.getOperand(CurOp++), (TSFlags & X86II::FormMask)-X86II::MRM0r, @@ -596,7 +882,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitMemModRMByte(MI, CurOp, (TSFlags & X86II::FormMask)-X86II::MRM0m, TSFlags, CurByte, OS, Fixups); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; break; case X86II::MRM_C1: EmitByte(BaseOpcode, CurByte, OS); @@ -639,14 +925,27 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(0xF9, CurByte, OS); break; } - + // If there is a remaining operand, it must be a trailing immediate. Emit it // according to the right size for the instruction. - if (CurOp != NumOps) - EmitImmediate(MI.getOperand(CurOp++), - X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), - CurByte, OS, Fixups); - + if (CurOp != NumOps) { + // The last source register of a 4 operand instruction in AVX is encoded + // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. + if (TSFlags & X86II::VEX_I8IMM) { + const MCOperand &MO = MI.getOperand(CurOp++); + bool IsExtReg = + X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); + unsigned RegNum = (IsExtReg ? (1 << 7) : 0); + RegNum |= GetX86RegNum(MO) << 4; + EmitImmediate(MCOperand::CreateImm(RegNum), 1, FK_Data_1, CurByte, OS, + Fixups); + } else + EmitImmediate(MI.getOperand(CurOp++), + X86II::getSizeOfImm(TSFlags), getImmFixupKind(TSFlags), + CurByte, OS, Fixups); + } + + #ifndef NDEBUG // FIXME: Verify. if (/*!Desc.isVariadic() &&*/ CurOp != NumOps) { diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 98975ea..5f31e00 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -127,21 +127,29 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::ST4: case X86::ST5: case X86::ST6: case X86::ST7: return RegNo-X86::ST0; - case X86::XMM0: case X86::XMM8: case X86::MM0: + case X86::XMM0: case X86::XMM8: + case X86::YMM0: case X86::YMM8: case X86::MM0: return 0; - case X86::XMM1: case X86::XMM9: case X86::MM1: + case X86::XMM1: case X86::XMM9: + case X86::YMM1: case X86::YMM9: case X86::MM1: return 1; - case X86::XMM2: case X86::XMM10: case X86::MM2: + case X86::XMM2: case X86::XMM10: + case X86::YMM2: case X86::YMM10: case X86::MM2: return 2; - case X86::XMM3: case X86::XMM11: case X86::MM3: + case X86::XMM3: case X86::XMM11: + case X86::YMM3: case X86::YMM11: case X86::MM3: return 3; - case X86::XMM4: case X86::XMM12: case X86::MM4: + case X86::XMM4: case X86::XMM12: + case X86::YMM4: case X86::YMM12: case X86::MM4: return 4; - case X86::XMM5: case X86::XMM13: case X86::MM5: + case X86::XMM5: case X86::XMM13: + case X86::YMM5: case X86::YMM13: case X86::MM5: return 5; - case X86::XMM6: case X86::XMM14: case X86::MM6: + case X86::XMM6: case X86::XMM14: + case X86::YMM6: case X86::YMM14: case X86::MM6: return 6; - case X86::XMM7: case X86::XMM15: case X86::MM7: + case X86::XMM7: case X86::XMM15: + case X86::YMM7: case X86::YMM15: case X86::MM7: return 7; case X86::ES: @@ -157,6 +165,34 @@ unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo) { case X86::GS: return 5; + case X86::CR0: + return 0; + case X86::CR1: + return 1; + case X86::CR2: + return 2; + case X86::CR3: + return 3; + case X86::CR4: + return 4; + + case X86::DR0: + return 0; + case X86::DR1: + return 1; + case X86::DR2: + return 2; + case X86::DR3: + return 3; + case X86::DR4: + return 4; + case X86::DR5: + return 5; + case X86::DR6: + return 6; + case X86::DR7: + return 7; + default: assert(isVirtualRegister(RegNo) && "Unknown physical register!"); llvm_unreachable("Register allocator hasn't allocated reg correctly yet!"); @@ -357,56 +393,6 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { } } -const TargetRegisterClass* const* -X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - bool callsEHReturn = false; - if (MF) - callsEHReturn = MF->getMMI().callsEHReturn(); - - static const TargetRegisterClass * const CalleeSavedRegClasses32Bit[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses32EHRet[] = { - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, - &X86::GR32RegClass, &X86::GR32RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64Bit[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClasses64EHRet[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, 0 - }; - static const TargetRegisterClass * const CalleeSavedRegClassesWin64[] = { - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::GR64RegClass, &X86::GR64RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, - &X86::VR128RegClass, &X86::VR128RegClass, 0 - }; - - if (Is64Bit) { - if (IsWin64) - return CalleeSavedRegClassesWin64; - else - return (callsEHReturn ? - CalleeSavedRegClasses64EHRet : CalleeSavedRegClasses64Bit); - } else { - return (callsEHReturn ? - CalleeSavedRegClasses32EHRet : CalleeSavedRegClasses32Bit); - } -} - BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // Set the stack-pointer register and its aliases as reserved. @@ -696,8 +682,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta, - true, false); + (-1U*SlotSize)+TailCallReturnAddrDelta, true); } if (hasFP(MF)) { @@ -710,7 +695,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, -(int)SlotSize + TFI.getOffsetOfLocalArea() + TailCallReturnAddrDelta, - true, false); + true); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1240,8 +1225,8 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, if (CSSize) { unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r; MachineInstr *MI = - addLeaRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), - FramePtr, false, -CSSize); + addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr), + FramePtr, false, -CSSize); MBB.insert(MBBI, MI); } else { BuildMI(MBB, MBBI, DL, @@ -1301,9 +1286,11 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, for (unsigned i = 0; i != 5; ++i) MIB.addOperand(MBBI->getOperand(i)); } else if (RetOpcode == X86::TCRETURNri64) { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)). + addReg(JumpTarget.getReg(), RegState::Kill); } else { - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)). + addReg(JumpTarget.getReg(), RegState::Kill); } MachineInstr *NewMI = prior(MBBI); diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index d0b82e2..d852bcd 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -105,12 +105,6 @@ public: /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; - /// getCalleeSavedRegClasses - Return a null-terminated list of the preferred - /// register classes to spill each callee-saved register with. The order and - /// length of this list match the getCalleeSavedRegs() list. - const TargetRegisterClass* const* - getCalleeSavedRegClasses(const MachineFunction *MF = 0) const; - /// getReservedRegs - Returns a bitset indexed by physical register number /// indicating if a register is a special register that has particular uses and /// should be considered unavailable at all times, e.g. SP, RA. This is used by diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 91cfaa9..9f0382e 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -147,7 +147,7 @@ let Namespace = "X86" in { def MM5 : Register<"mm5">, DwarfRegNum<[46, 34, 34]>; def MM6 : Register<"mm6">, DwarfRegNum<[47, 35, 35]>; def MM7 : Register<"mm7">, DwarfRegNum<[48, 36, 36]>; - + // Pseudo Floating Point registers def FP0 : Register<"fp0">; def FP1 : Register<"fp1">; @@ -155,7 +155,7 @@ let Namespace = "X86" in { def FP3 : Register<"fp3">; def FP4 : Register<"fp4">; def FP5 : Register<"fp5">; - def FP6 : Register<"fp6">; + def FP6 : Register<"fp6">; // XMM Registers, used by the various SSE instruction set extensions. // The sub_ss and sub_sd subregs are the same registers with another regclass. @@ -357,7 +357,7 @@ def GR16 : RegisterClass<"X86", [i16], 16, }]; } -def GR32 : RegisterClass<"X86", [i32], 32, +def GR32 : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP, R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)]; @@ -412,7 +412,7 @@ def GR32 : RegisterClass<"X86", [i32], 32, // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since // RIP isn't really a register and it can't be used anywhere except in an // address, but it doesn't cause trouble. -def GR64 : RegisterClass<"X86", [i64], 64, +def GR64 : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11, RBX, R14, R15, R12, R13, RBP, RSP, RIP]> { let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), @@ -446,7 +446,7 @@ def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]> { } // Debug registers. -def DEBUG_REG : RegisterClass<"X86", [i32], 32, +def DEBUG_REG : RegisterClass<"X86", [i32], 32, [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]> { } @@ -780,14 +780,14 @@ def RST : RegisterClass<"X86", [f80, f64, f32], 32, } // Generic vector registers: VR64 and VR128. -def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64, v2f32], 64, +def VR64 : RegisterClass<"X86", [v8i8, v4i16, v2i32, v1i64], 64, [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>; def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)]; - + let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; @@ -803,11 +803,27 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } -def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + +def VR256 : RegisterClass<"X86", [v8i32, v4i64, v8f32, v4f64], 256, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15]> { let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)]; + + let MethodProtos = [{ + iterator allocation_order_end(const MachineFunction &MF) const; + }]; + let MethodBodies = [{ + VR256Class::iterator + VR256Class::allocation_order_end(const MachineFunction &MF) const { + const TargetMachine &TM = MF.getTarget(); + const X86Subtarget &Subtarget = TM.getSubtarget(); + if (!Subtarget.is64Bit()) + return end()-8; // Only YMM0 to YMM7 are available in 32-bit mode. + else + return end(); + } + }]; } // Status flags registers. diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 09a2685..4a10be5 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -53,9 +53,12 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { if (GV->hasDLLImportLinkage()) return X86II::MO_DLLIMPORT; - // Materializable GVs (in JIT lazy compilation mode) do not require an - // extra load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + // Determine whether this is a reference to a definition or a declaration. + // Materializable GVs (in JIT lazy compilation mode) do not require an extra + // load from stub. + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; // X86-64 in PIC mode. if (isPICStyleRIPRel()) { @@ -293,12 +296,11 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , IsBTMemSlow(false) , IsUAMemFast(false) , HasVectorUAMem(false) - , DarwinVers(0) , stackAlignment(8) // FIXME: this is a known good value for Yonah. How about others? , MaxInlineSizeThreshold(128) - , Is64Bit(is64Bit) - , TargetType(isELF) { // Default to ELF unless otherwise specified. + , TargetTriple(TT) + , Is64Bit(is64Bit) { // default to hard float ABI if (FloatABIType == FloatABI::Default) @@ -328,47 +330,40 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, HasCMov = true; } - DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel << ", 3DNowLevel " << X863DNowLevel << ", 64bit " << HasX86_64 << "\n"); assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - if (TT.length() > 5) { - size_t Pos; - if ((Pos = TT.find("-darwin")) != std::string::npos) { - TargetType = isDarwin; - - // Compute the darwin version number. - if (isdigit(TT[Pos+7])) - DarwinVers = atoi(&TT[Pos+7]); - else - DarwinVers = 8; // Minimum supported darwin is Tiger. - } else if (TT.find("linux") != std::string::npos) { - // Linux doesn't imply ELF, but we don't currently support anything else. - TargetType = isELF; - } else if (TT.find("cygwin") != std::string::npos) { - TargetType = isCygwin; - } else if (TT.find("mingw") != std::string::npos) { - TargetType = isMingw; - } else if (TT.find("win32") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("windows") != std::string::npos) { - TargetType = isWindows; - } else if (TT.find("-cl") != std::string::npos) { - TargetType = isDarwin; - DarwinVers = 9; - } - } - // Stack alignment is 16 bytes on Darwin (both 32 and 64 bit) and for all 64 // bit targets. - if (TargetType == isDarwin || Is64Bit) + if (isTargetDarwin() || Is64Bit) stackAlignment = 16; if (StackAlignment) stackAlignment = StackAlignment; } + +/// IsCalleePop - Determines whether the callee is required to pop its +/// own arguments. Callee pop is necessary to support tail calls. +bool X86Subtarget::IsCalleePop(bool IsVarArg, + CallingConv::ID CallingConv) const { + if (IsVarArg) + return false; + + switch (CallingConv) { + default: + return false; + case CallingConv::X86_StdCall: + return !is64Bit(); + case CallingConv::X86_FastCall: + return !is64Bit(); + case CallingConv::X86_ThisCall: + return !is64Bit(); + case CallingConv::Fast: + return GuaranteedTailCallOpt; + case CallingConv::GHC: + return GuaranteedTailCallOpt; + } +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 646af91..486dbc4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,7 +14,9 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtarget.h" +#include "llvm/CallingConv.h" #include namespace llvm { @@ -88,10 +90,6 @@ protected: /// operands. This may require setting a feature bit in the processor. bool HasVectorUAMem; - /// DarwinVers - Nonzero if this is a darwin platform: the numeric - /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. - unsigned char DarwinVers; // Is any darwin-x86 platform. - /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -99,6 +97,9 @@ protected: /// Max. memset / memcpy size that is turned into rep/movs, rep/stos ops. /// unsigned MaxInlineSizeThreshold; + + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; private: /// Is64Bit - True if the processor supports 64-bit instructions and @@ -106,9 +107,6 @@ private: bool Is64Bit; public: - enum { - isELF, isCygwin, isDarwin, isWindows, isMingw - } TargetType; /// This constructor initializes the data members to match that /// of the specified triple. @@ -157,24 +155,31 @@ public: bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool isTargetDarwin() const { return TargetType == isDarwin; } - bool isTargetELF() const { return TargetType == isELF; } + bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + + // ELF is a reasonably sane default and the only other X86 targets we + // support are Darwin and Windows. Just use "not those". + bool isTargetELF() const { + return !isTargetDarwin() && !isTargetWindows() && !isTargetCygMing(); + } + bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } - bool isTargetWindows() const { return TargetType == isWindows; } - bool isTargetMingw() const { return TargetType == isMingw; } - bool isTargetCygwin() const { return TargetType == isCygwin; } + bool isTargetWindows() const { return TargetTriple.getOS() == Triple::Win32; } + bool isTargetMingw() const { + return TargetTriple.getOS() == Triple::MinGW32 || + TargetTriple.getOS() == Triple::MinGW64; } + bool isTargetCygwin() const { return TargetTriple.getOS() == Triple::Cygwin; } bool isTargetCygMing() const { - return TargetType == isMingw || TargetType == isCygwin; + return isTargetMingw() || isTargetCygwin(); } - + /// isTargetCOFF - Return true if this is any COFF/Windows target variant. bool isTargetCOFF() const { - return TargetType == isMingw || TargetType == isCygwin || - TargetType == isWindows; + return isTargetMingw() || isTargetCygwin() || isTargetWindows(); } bool isTargetWin64() const { - return Is64Bit && (TargetType == isMingw || TargetType == isWindows); + return Is64Bit && (isTargetMingw() || isTargetWindows()); } std::string getDataLayout() const { @@ -208,7 +213,10 @@ public: /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, /// 10 = Snow Leopard, etc. - unsigned getDarwinVers() const { return DarwinVers; } + unsigned getDarwinVers() const { + if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber(); + return 0; + } /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel @@ -237,6 +245,9 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; + + /// IsCalleePop - Test whether a function should pop its own arguments. + bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index f2c5058..df00d3f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -173,14 +173,18 @@ bool X86TargetMachine::addInstSelector(PassManagerBase &PM, // Install an instruction selector. PM.add(createX86ISelDag(*this, OptLevel)); - // Install a pass to insert x87 FP_REG_KILL instructions, as needed. - PM.add(createX87FPRegKillInserterPass()); + // For 32-bit, prepend instructions to set the "global base reg" for PIC. + if (!Subtarget.is64Bit()) + PM.add(createGlobalBaseRegPass()); return false; } bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + // Install a pass to insert x87 FP_REG_KILL instructions, as needed. + PM.add(createX87FPRegKillInserterPass()); + PM.add(createX86MaxStackAlignmentHeuristicPass()); return false; // -print-machineinstr shouldn't print after this. } diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index c100c59..6656bdc 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -138,7 +138,6 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // FALL THROUGH case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: - case GlobalValue::LinkerPrivateLinkage: break; case GlobalValue::DLLImportLinkage: llvm_unreachable("DLLImport linkage is not supported by this target!"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index b230572..abe7b2f 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -245,7 +245,7 @@ SDValue XCoreTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), MVT::i32); // If it's a debug information descriptor, don't mess with it. if (DAG.isVerifiedDebugInfoDesc(Op)) return GA; @@ -269,7 +269,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // transform to label + getid() * size const GlobalValue *GV = cast(Op)->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32); + SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32); const GlobalVariable *GVar = dyn_cast(GV); if (!GVar) { // If GV is an alias then use the aliasee to determine size @@ -454,12 +454,12 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const if (LD->getAlignment() == 2) { int SVOffset = LD->getSrcValueOffset(); - SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, MVT::i32, dl, Chain, BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, DAG.getConstant(2, MVT::i32)); - SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, MVT::i32, dl, Chain, HighAddr, LD->getSrcValue(), SVOffset + 2, MVT::i16, LD->isVolatile(), LD->isNonTemporal(), 2); @@ -812,6 +812,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -826,7 +827,7 @@ XCoreTargetLowering::LowerCall(SDValue Chain, SDValue Callee, case CallingConv::Fast: case CallingConv::C: return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); } } @@ -839,6 +840,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { @@ -866,7 +868,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - SDValue Arg = Outs[i].Val; + SDValue Arg = OutVals[i]; // Promote the value if needed. switch (VA.getLocInfo()) { @@ -919,7 +921,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); @@ -1072,7 +1074,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, LRSaveSize + VA.getLocMemOffset(), - true, false); + true); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -1097,7 +1099,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // address for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset, true, false); + int FI = MFI->CreateFixedObject(4, offset, true); if (i == FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -1120,7 +1122,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), - true, false)); + true)); } } @@ -1133,19 +1135,19 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, bool XCoreTargetLowering:: CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const { + const SmallVectorImpl &Outs, + LLVMContext &Context) const { SmallVector RVLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), - RVLocs, *DAG.getContext()); - return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore); + RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_XCore); } SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of @@ -1175,7 +1177,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, assert(VA.isRegLoc() && "Can only return in registers!"); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), - Outs[i].Val, Flag); + OutVals[i], Flag); // guarantee that all emitted copies are // stuck together, avoiding something bad @@ -1221,23 +1223,22 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) - .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); F->insert(It, copy0MBB); F->insert(It, sinkMBB); - // Update machine-CFG edges by first adding all successors of the current - // block to the new block which will contain the Phi node for the select. - for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), - E = BB->succ_end(); I != E; ++I) - sinkMBB->addSuccessor(*I); - // Next, remove all successors of the current block, and add the true - // and fallthrough blocks as its successors. - while (!BB->succ_empty()) - BB->removeSuccessor(BB->succ_begin()); + + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) + .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); + // copy0MBB: // %FalseValue = ... // # fallthrough to sinkMBB @@ -1250,11 +1251,12 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... BB = sinkMBB; - BuildMI(BB, dl, TII.get(XCore::PHI), MI->getOperand(0).getReg()) + BuildMI(*BB, BB->begin(), dl, + TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1379,7 +1381,6 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, SDValue Mul0, Mul1, Addend0, Addend1; if (N->getValueType(0) == MVT::i32 && isADDADDMUL(SDValue(N, 0), Mul0, Mul1, Addend0, Addend1, true)) { - SDValue Zero = DAG.getConstant(0, MVT::i32); SDValue Ignored = DAG.getNode(XCoreISD::LMUL, dl, DAG.getVTList(MVT::i32, MVT::i32), Mul0, Mul1, Addend0, Addend1); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index d8d2a3a..febc198 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -178,6 +179,7 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; @@ -186,13 +188,13 @@ namespace llvm { LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - SelectionDAG &DAG) const; + const SmallVectorImpl &ArgsFlags, + LLVMContext &Context) const; }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 5260258..dd90ea9 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -299,9 +299,8 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, unsigned XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond)const{ - // FIXME there should probably be a DebugLoc argument here - DebugLoc dl; + const SmallVectorImpl &Cond, + DebugLoc DL)const{ // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 2 || Cond.size() == 0) && @@ -310,11 +309,11 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, if (FBB == 0) { // One way branch. if (Cond.empty()) { // Unconditional branch - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(TBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(TBB); } else { // Conditional branch. unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); } return 1; @@ -323,9 +322,9 @@ XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB, // Two-way Conditional branch. assert(Cond.size() == 2 && "Unexpected number of components!"); unsigned Opc = GetCondBranchFromCond((XCore::CondCode)Cond[0].getImm()); - BuildMI(&MBB, dl, get(Opc)).addReg(Cond[1].getReg()) + BuildMI(&MBB, DL, get(Opc)).addReg(Cond[1].getReg()) .addMBB(TBB); - BuildMI(&MBB, dl, get(XCore::BRFU_lu6)).addMBB(FBB); + BuildMI(&MBB, DL, get(XCore::BRFU_lu6)).addMBB(FBB); return 2; } @@ -357,37 +356,31 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } -bool XCoreInstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - if (DestRC == SrcRC) { - if (DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) - .addReg(SrcReg) - .addImm(0); - return true; - } else { - return false; - } +void XCoreInstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + bool GRDest = XCore::GRRegsRegClass.contains(DestReg); + bool GRSrc = XCore::GRRegsRegClass.contains(SrcReg); + + if (GRDest && GRSrc) { + BuildMI(MBB, I, DL, get(XCore::ADD_2rus), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)) + .addImm(0); + return; } - if (SrcRC == XCore::RRegsRegisterClass && SrcReg == XCore::SP && - DestRC == XCore::GRRegsRegisterClass) { - BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg) - .addImm(0); - return true; + if (GRDest && SrcReg == XCore::SP) { + BuildMI(MBB, I, DL, get(XCore::LDAWSP_ru6), DestReg).addImm(0); + return; } - if (DestRC == XCore::RRegsRegisterClass && DestReg == XCore::SP && - SrcRC == XCore::GRRegsRegisterClass) { + + if (DestReg == XCore::SP && GRSrc) { BuildMI(MBB, I, DL, get(XCore::SETSP_1r)) - .addReg(SrcReg); - return true; + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } - return false; + llvm_unreachable("Impossible reg-to-reg copy"); } void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -438,8 +431,10 @@ bool XCoreInstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(it->getReg()); - storeRegToStackSlot(MBB, MI, it->getReg(), true, - it->getFrameIdx(), it->getRegClass(), &RI); + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + storeRegToStackSlot(MBB, MI, Reg, true, + it->getFrameIdx(), RC, &RI); if (emitFrameMoves) { MCSymbol *SaveLabel = MF->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, get(XCore::DBG_LABEL)).addSym(SaveLabel); @@ -460,10 +455,11 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, --BeforeI; for (std::vector::const_iterator it = CSI.begin(); it != CSI.end(); ++it) { - + unsigned Reg = it->getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); loadRegFromStackSlot(MBB, MI, it->getReg(), it->getFrameIdx(), - it->getRegClass(), &RI); + RC, &RI); assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert multiple diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h index 9035ea9..e5b0171 100644 --- a/lib/Target/XCore/XCoreInstrInfo.h +++ b/lib/Target/XCore/XCoreInstrInfo.h @@ -58,17 +58,16 @@ public: bool AllowModify) const; virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) const; + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const; virtual void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index dd3cbc1..19b9b1f 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -733,7 +733,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), // TODO setd, eet, eef, getts, setpt, outct, inct, chkct, outt, intt, out, // in, outshr, inshr, testct, testwct, tinitpc, tinitdp, tinitsp, tinitcp, // tsetmr, sext (reg), zext (reg) -let isTwoAddress = 1 in { +let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), "sext $dst, $src2", diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 0cfb358..2a88342 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -82,18 +82,6 @@ const unsigned* XCoreRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) return CalleeSavedRegs; } -const TargetRegisterClass* const* -XCoreRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { - static const TargetRegisterClass * const CalleeSavedRegClasses[] = { - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::GRRegsRegisterClass, - XCore::GRRegsRegisterClass, XCore::RRegsRegisterClass, - 0 - }; - return CalleeSavedRegClasses; -} - BitVector XCoreRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(XCore::CP); @@ -320,7 +308,7 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx; if (! isVarArg) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true); } else { FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 5bdd059..66132ba 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -44,9 +44,6 @@ public: const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - const TargetRegisterClass* const* getCalleeSavedRegClasses( - const MachineFunction *MF = 0) const; - BitVector getReservedRegs(const MachineFunction &MF) const; bool requiresRegisterScavenging(const MachineFunction &MF) const; diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp index 37d7a00..abfa514 100644 --- a/lib/Transforms/Hello/Hello.cpp +++ b/lib/Transforms/Hello/Hello.cpp @@ -28,7 +28,7 @@ namespace { Hello() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; @@ -46,7 +46,7 @@ namespace { Hello2() : FunctionPass(&ID) {} virtual bool runOnFunction(Function &F) { - HelloCounter++; + ++HelloCounter; errs() << "Hello: "; errs().write_escaped(F.getName()) << '\n'; return false; diff --git a/lib/Transforms/Hello/Hello.exports b/lib/Transforms/Hello/Hello.exports new file mode 100644 index 0000000..e69de29 diff --git a/lib/Transforms/Hello/Makefile b/lib/Transforms/Hello/Makefile index c5e75d4..f1e3148 100644 --- a/lib/Transforms/Hello/Makefile +++ b/lib/Transforms/Hello/Makefile @@ -12,5 +12,13 @@ LIBRARYNAME = LLVMHello LOADABLE_MODULE = 1 USEDLIBS = +# If we don't need RTTI or EH, there's no reason to export anything +# from the hello plugin. +ifneq ($(REQUIRES_RTTI), 1) +ifneq ($(REQUIRES_EH), 1) +EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/Hello.exports +endif +endif + include $(LEVEL)/Makefile.common diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 89f213e..28ea079 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -360,19 +360,20 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { IndicesVector Operands; for (Value::use_iterator UI = Arg->use_begin(), E = Arg->use_end(); UI != E; ++UI) { + User *U = *UI; Operands.clear(); - if (LoadInst *LI = dyn_cast(*UI)) { + if (LoadInst *LI = dyn_cast(U)) { if (LI->isVolatile()) return false; // Don't hack volatile loads Loads.push_back(LI); // Direct loads are equivalent to a GEP with a zero index and then a load. Operands.push_back(0); - } else if (GetElementPtrInst *GEP = dyn_cast(*UI)) { + } else if (GetElementPtrInst *GEP = dyn_cast(U)) { if (GEP->use_empty()) { // Dead GEP's cause trouble later. Just remove them if we run into // them. getAnalysis().deleteValue(GEP); GEP->eraseFromParent(); - // TODO: This runs the above loop over and over again for dead GEPS + // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? return isSafeToPromoteArgument(Arg, isByVal); @@ -452,12 +453,14 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const { // Now check every path from the entry block to the load for transparency. // To do this, we perform a depth first search on the inverse CFG from the // loading block. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; for (idf_ext_iterator > - I = idf_ext_begin(*PI, TranspBlocks), - E = idf_ext_end(*PI, TranspBlocks); I != E; ++I) + I = idf_ext_begin(P, TranspBlocks), + E = idf_ext_end(P, TranspBlocks); I != E; ++I) if (AA.canBasicBlockModify(**I, Arg, LoadSize)) return false; + } } // If the path from the entry of the function to each load is free of diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 692e47d..475eee8 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -120,9 +120,14 @@ namespace { typedef SmallVector UseVector; + protected: + // DAH uses this to specify a different ID. + explicit DAE(void *ID) : ModulePass(ID) {} + public: static char ID; // Pass identification, replacement for typeid DAE() : ModulePass(&ID) {} + bool runOnModule(Module &M); virtual bool ShouldHackArguments() const { return false; } @@ -155,6 +160,8 @@ namespace { /// by bugpoint. struct DAH : public DAE { static char ID; + DAH() : DAE(&ID) {} + virtual bool ShouldHackArguments() const { return true; } }; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index b429213..735a1c4 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -160,13 +160,12 @@ static bool SafeToDestroyConstant(const Constant *C) { static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, SmallPtrSet &PHIUsers) { for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; - ++UI) - if (const ConstantExpr *CE = dyn_cast(*UI)) { + ++UI) { + const User *U = *UI; + if (const ConstantExpr *CE = dyn_cast(U)) { GS.HasNonInstructionUser = true; - if (AnalyzeGlobal(CE, GS, PHIUsers)) return true; - - } else if (const Instruction *I = dyn_cast(*UI)) { + } else if (const Instruction *I = dyn_cast(U)) { if (!GS.HasMultipleAccessingFunctions) { const Function *F = I->getParent()->getParent(); if (GS.AccessingFunction == 0) @@ -221,18 +220,21 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, if (AnalyzeGlobal(I, GS, PHIUsers)) return true; GS.HasPHIUser = true; } else if (isa(I)) { + // Nothing to analyse. } else if (isa(I)) { - if (I->getOperand(1) == V) + const MemTransferInst *MTI = cast(I); + if (MTI->getArgOperand(0) == V) GS.StoredType = GlobalStatus::isStored; - if (I->getOperand(2) == V) + if (MTI->getArgOperand(1) == V) GS.isLoaded = true; } else if (isa(I)) { - assert(I->getOperand(1) == V && "Memset only takes one pointer!"); + assert(cast(I)->getArgOperand(0) == V && + "Memset only takes one pointer!"); GS.StoredType = GlobalStatus::isStored; } else { return true; // Any other non-load instruction might take address! } - } else if (const Constant *C = dyn_cast(*UI)) { + } else if (const Constant *C = dyn_cast(U)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. if (!SafeToDestroyConstant(C)) @@ -242,6 +244,7 @@ static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS, // Otherwise must be some other user. return true; } + } return false; } @@ -1304,7 +1307,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, const Type *IntPtrTy = TD->getIntPtrType(CI->getContext()); Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy, ConstantInt::get(IntPtrTy, TypeSize), - NElems, + NElems, 0, CI->getName() + ".f" + Twine(FieldNo)); FieldMallocs.push_back(NMI); new StoreInst(NMI, NGV, CI); @@ -1323,8 +1326,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI, // if (F2) { free(F2); F2 = 0; } // } // The malloc can also fail if its argument is too large. - Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0); - Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1), + Constant *ConstantZero = ConstantInt::get(CI->getArgOperand(0)->getType(), 0); + Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getArgOperand(0), ConstantZero, "isneg"); for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) { Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i], @@ -1511,10 +1514,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, // If this is an allocation of a fixed size array of structs, analyze as a // variable size array. malloc [100 x struct],1 -> malloc struct, 100 - if (NElems == ConstantInt::get(CI->getOperand(1)->getType(), 1)) + if (NElems == ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) if (const ArrayType *AT = dyn_cast(AllocTy)) AllocTy = AT->getElementType(); - + const StructType *AllocSTy = dyn_cast(AllocTy); if (!AllocSTy) return false; @@ -1533,7 +1536,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV, Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements()); Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy, AllocSize, NumElements, - CI->getName()); + 0, CI->getName()); Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI); CI->replaceAllUsesWith(Cast); CI->eraseFromParent(); @@ -1597,13 +1600,15 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { GVElType->isFloatingPointTy() || GVElType->isPointerTy() || GVElType->isVectorTy()) return false; - + // Walk the use list of the global seeing if all the uses are load or store. // If there is anything else, bail out. - for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I) - if (!isa(I) && !isa(I)) + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E; ++I){ + User *U = *I; + if (!isa(U) && !isa(U)) return false; - + } + DEBUG(dbgs() << " *** SHRINKING TO BOOL: " << *GV); // Create the new global, initializing it to false. @@ -1641,7 +1646,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { // bool. Instruction *StoredVal = cast(SI->getOperand(0)); - // If we're already replaced the input, StoredVal will be a cast or + // If we've already replaced the input, StoredVal will be a cast or // select instruction. If not, it will be a load of the original // global. if (LoadInst *LI = dyn_cast(StoredVal)) { @@ -2260,8 +2265,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, getVal(Values, CI->getOperand(0)), CI->getType()); } else if (SelectInst *SI = dyn_cast(CurInst)) { - InstResult = - ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), + InstResult = ConstantExpr::getSelect(getVal(Values, SI->getOperand(0)), getVal(Values, SI->getOperand(1)), getVal(Values, SI->getOperand(2))); } else if (GetElementPtrInst *GEP = dyn_cast(CurInst)) { @@ -2302,7 +2306,8 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal, if (!Callee) return false; // Cannot resolve. SmallVector Formals; - for (User::op_iterator i = CI->op_begin() + 1, e = CI->op_end(); + CallSite CS(CI); + for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) Formals.push_back(getVal(Values, *i)); diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index df2456f..e4db235 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -85,15 +85,16 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { unsigned NumNonconstant = 0; for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E; ++UI) { + User *U = *UI; // Ignore blockaddress uses. - if (isa(*UI)) continue; + if (isa(U)) continue; // Used by a non-instruction, or not the callee of a function, do not // transform. - if (!isa(*UI) && !isa(*UI)) + if (!isa(U) && !isa(U)) return false; - CallSite CS = CallSite::get(cast(*UI)); + CallSite CS = CallSite::get(cast(U)); if (!CS.isCallee(UI)) return false; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index b785bb0..027a220 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -468,7 +468,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // move a call site to a function in this SCC before the // 'FirstCallInSCC' barrier. if (SCC.isSingular()) { - std::swap(CallSites[CSi], CallSites.back()); + CallSites[CSi] = CallSites.back(); CallSites.pop_back(); } else { CallSites.erase(CallSites.begin()+CSi); diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index 4d61e83..76cfef8 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -42,6 +42,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/Support/InstVisitor.h" #include "llvm/Transforms/Utils/Local.h" @@ -262,8 +263,8 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst) // char*. It returns "void", so it doesn't need to replace any of // Inst's uses and doesn't get a name. CastInst* CI = - new BitCastInst(Inst->getOperand(1), SBPTy, "LJBuf", Inst); - Value *Args[] = { CI, Inst->getOperand(2) }; + new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst); + Value *Args[] = { CI, Inst->getArgOperand(1) }; CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst); SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()]; @@ -378,7 +379,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) const Type* SBPTy = Type::getInt8PtrTy(Inst->getContext()); CastInst* BufPtr = - new BitCastInst(Inst->getOperand(1), SBPTy, "SBJmpBuf", Inst); + new BitCastInst(Inst->getArgOperand(0), SBPTy, "SBJmpBuf", Inst); Value *Args[] = { GetSetJmpMap(Func), BufPtr, ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++) @@ -405,12 +406,14 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // Loop over all of the uses of instruction. If any of them are after the // call, "spill" the value to the stack. for (Value::use_iterator UI = II->use_begin(), E = II->use_end(); - UI != E; ++UI) - if (cast(*UI)->getParent() != ABlock || - InstrsAfterCall.count(cast(*UI))) { + UI != E; ++UI) { + User *U = *UI; + if (cast(U)->getParent() != ABlock || + InstrsAfterCall.count(cast(U))) { DemoteRegToStack(*II); break; } + } InstrsAfterCall.clear(); // Change the setjmp call into a branch statement. We'll remove the @@ -473,7 +476,8 @@ void LowerSetJmp::visitCallInst(CallInst& CI) // Construct the new "invoke" instruction. TerminatorInst* Term = OldBB->getTerminator(); - std::vector Params(CI.op_begin() + 1, CI.op_end()); + CallSite CS(&CI); + std::vector Params(CS.arg_begin(), CS.arg_end()); InvokeInst* II = InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func], Params.begin(), Params.end(), CI.getName(), Term); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 622a9b5..55d5e2a 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -146,7 +146,7 @@ static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { switch(Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); - // Fall through in Release-Asserts mode. + // Fall through in Release mode. case Type::IntegerTyID: case Type::OpaqueTyID: // Ty1 == Ty2 would have returned true earlier. @@ -535,6 +535,7 @@ static LinkageCategory categorize(const Function *F) { case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::ExternalWeakLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: return ExternalWeak; case GlobalValue::ExternalLinkage: diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 07525ea..6b9814c 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -66,13 +66,13 @@ Function* PartialInliner::unswitchFunction(Function* F) { return 0; // Clone the function, so that we can hack away on it. - DenseMap ValueMap; - Function* duplicateFunction = CloneFunction(F, ValueMap); + ValueMap VMap; + Function* duplicateFunction = CloneFunction(F, VMap); duplicateFunction->setLinkage(GlobalValue::InternalLinkage); F->getParent()->getFunctionList().push_back(duplicateFunction); - BasicBlock* newEntryBlock = cast(ValueMap[entryBlock]); - BasicBlock* newReturnBlock = cast(ValueMap[returnBlock]); - BasicBlock* newNonReturnBlock = cast(ValueMap[nonReturnBlock]); + BasicBlock* newEntryBlock = cast(VMap[entryBlock]); + BasicBlock* newReturnBlock = cast(VMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast(VMap[nonReturnBlock]); // Go ahead and update all uses to the duplicate, so that we can just // use the inliner functionality when we're done hacking. diff --git a/lib/Transforms/IPO/PartialSpecialization.cpp b/lib/Transforms/IPO/PartialSpecialization.cpp index 084b94e..58e1448 100644 --- a/lib/Transforms/IPO/PartialSpecialization.cpp +++ b/lib/Transforms/IPO/PartialSpecialization.cpp @@ -32,6 +32,10 @@ using namespace llvm; STATISTIC(numSpecialized, "Number of specialized functions created"); +STATISTIC(numReplaced, "Number of callers replaced by specialization"); + +// Maximum number of arguments markable interested +static const int MaxInterests = 6; // Call must be used at least occasionally static const int CallsMin = 5; @@ -40,8 +44,9 @@ static const int CallsMin = 5; static const double ConstValPercent = .1; namespace { + typedef SmallVector InterestingArgVector; class PartSpec : public ModulePass { - void scanForInterest(Function&, SmallVector&); + void scanForInterest(Function&, InterestingArgVector&); int scanDistribution(Function&, int, std::map&); public : static char ID; // Pass identification, replacement for typeid @@ -59,13 +64,15 @@ X("partialspecialization", "Partial Specialization"); // a call to the specialized function. Returns the specialized function static Function* SpecializeFunction(Function* F, - DenseMap& replacements) { + ValueMap& replacements) { // arg numbers of deleted arguments - DenseSet deleted; - for (DenseMap::iterator + DenseMap deleted; + for (ValueMap::iterator repb = replacements.begin(), repe = replacements.end(); - repb != repe; ++repb) - deleted.insert(cast(repb->first)->getArgNo()); + repb != repe; ++repb) { + Argument const *arg = cast(repb->first); + deleted[arg->getArgNo()] = arg; + } Function* NF = CloneFunction(F, replacements); NF->setLinkage(GlobalValue::InternalLinkage); @@ -80,9 +87,23 @@ SpecializeFunction(Function* F, if (CS.getCalledFunction() == F) { SmallVector args; - for (unsigned x = 0; x < CS.arg_size(); ++x) - if (!deleted.count(x)) - args.push_back(CS.getArgument(x)); + // Assemble the non-specialized arguments for the updated callsite. + // In the process, make sure that the specialized arguments are + // constant and match the specialization. If that's not the case, + // this callsite needs to call the original or some other + // specialization; don't change it here. + CallSite::arg_iterator as = CS.arg_begin(), ae = CS.arg_end(); + for (CallSite::arg_iterator ai = as; ai != ae; ++ai) { + DenseMap::iterator delit = deleted.find( + std::distance(as, ai)); + if (delit == deleted.end()) + args.push_back(cast(ai)); + else { + Constant *ci = dyn_cast(ai); + if (!(ci && ci == replacements[delit->second])) + goto next_use; + } + } Value* NCall; if (CallInst *CI = dyn_cast(i)) { NCall = CallInst::Create(NF, args.begin(), args.end(), @@ -99,8 +120,11 @@ SpecializeFunction(Function* F, } CS.getInstruction()->replaceAllUsesWith(NCall); CS.getInstruction()->eraseFromParent(); + ++numReplaced; } } + next_use: + ; } return NF; } @@ -111,7 +135,7 @@ bool PartSpec::runOnModule(Module &M) { for (Module::iterator I = M.begin(); I != M.end(); ++I) { Function &F = *I; if (F.isDeclaration() || F.mayBeOverridden()) continue; - SmallVector interestingArgs; + InterestingArgVector interestingArgs; scanForInterest(F, interestingArgs); // Find the first interesting Argument that we can specialize on @@ -126,7 +150,7 @@ bool PartSpec::runOnModule(Module &M) { ee = distribution.end(); ii != ee; ++ii) if (total > ii->second && ii->first && ii->second > total * ConstValPercent) { - DenseMap m; + ValueMap m; Function::arg_iterator arg = F.arg_begin(); for (int y = 0; y < interestingArgs[x]; ++y) ++arg; @@ -143,7 +167,7 @@ bool PartSpec::runOnModule(Module &M) { /// scanForInterest - This function decides which arguments would be worth /// specializing on. -void PartSpec::scanForInterest(Function& F, SmallVector& args) { +void PartSpec::scanForInterest(Function& F, InterestingArgVector& args) { for(Function::arg_iterator ii = F.arg_begin(), ee = F.arg_end(); ii != ee; ++ii) { for(Value::use_iterator ui = ii->use_begin(), ue = ii->use_end(); diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp index 6bc8e66..12e8db8 100644 --- a/lib/Transforms/IPO/StripSymbols.cpp +++ b/lib/Transforms/IPO/StripSymbols.cpp @@ -73,6 +73,19 @@ namespace { AU.setPreservesAll(); } }; + + class StripDeadDebugInfo : public ModulePass { + public: + static char ID; // Pass identification, replacement for typeid + explicit StripDeadDebugInfo() + : ModulePass(&ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + } + }; } char StripSymbols::ID = 0; @@ -99,6 +112,14 @@ ModulePass *llvm::createStripDebugDeclarePass() { return new StripDebugDeclare(); } +char StripDeadDebugInfo::ID = 0; +static RegisterPass +A("strip-dead-debug-info", "Strip debug info for unused symbols"); + +ModulePass *llvm::createStripDeadDebugInfoPass() { + return new StripDeadDebugInfo(); +} + /// OnlyUsedBy - Return true if V is only used by Usr. static bool OnlyUsedBy(Value *V, Value *Usr) { for(Value::use_iterator I = V->use_begin(), E = V->use_end(); I != E; ++I) { @@ -223,27 +244,27 @@ static bool StripDebugInfo(Module &M) { Changed = true; } - NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); - } - - NMD = M.getNamedMetadata("llvm.dbg.lv"); - if (NMD) { - Changed = true; - NMD->eraseFromParent(); + for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), + NME = M.named_metadata_end(); NMI != NME;) { + NamedMDNode *NMD = NMI; + ++NMI; + if (NMD->getName().startswith("llvm.dbg.")) { + NMD->eraseFromParent(); + Changed = true; + } } - + unsigned MDDbgKind = M.getMDKindID("dbg"); - for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; - ++BI) + ++BI) { + Changed = true; // FIXME: Only set if there was debug metadata. BI->setMetadata(MDDbgKind, 0); + } - return true; + return Changed; } bool StripSymbols::runOnModule(Module &M) { @@ -266,8 +287,8 @@ bool StripDebugDeclare::runOnModule(Module &M) { if (Declare) { while (!Declare->use_empty()) { CallInst *CI = cast(Declare->use_back()); - Value *Arg1 = CI->getOperand(1); - Value *Arg2 = CI->getOperand(2); + Value *Arg1 = CI->getArgOperand(0); + Value *Arg2 = CI->getArgOperand(1); assert(CI->use_empty() && "llvm.dbg intrinsic should have void result"); CI->eraseFromParent(); if (Arg1->use_empty()) { @@ -295,3 +316,83 @@ bool StripDebugDeclare::runOnModule(Module &M) { return true; } + +/// getRealLinkageName - If special LLVM prefix that is used to inform the asm +/// printer to not emit usual symbol prefix before the symbol name is used then +/// return linkage name after skipping this special LLVM prefix. +static StringRef getRealLinkageName(StringRef LinkageName) { + char One = '\1'; + if (LinkageName.startswith(StringRef(&One, 1))) + return LinkageName.substr(1); + return LinkageName; +} + +bool StripDeadDebugInfo::runOnModule(Module &M) { + bool Changed = false; + + // Debugging infomration is encoded in llvm IR using metadata. This is designed + // such a way that debug info for symbols preserved even if symbols are + // optimized away by the optimizer. This special pass removes debug info for + // such symbols. + + // llvm.dbg.gv keeps track of debug info for global variables. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv")) { + SmallVector MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DIGlobalVariable(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + if (M.getGlobalVariable(DIGlobalVariable(*I).getGlobal()->getName(), + true)) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); + NMD->addOperand(*I); + } + else + Changed = true; + } + } + + // llvm.dbg.sp keeps track of debug info for subprograms. + if (NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.sp")) { + SmallVector MDs; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + if (DISubprogram(NMD->getOperand(i)).Verify()) + MDs.push_back(NMD->getOperand(i)); + else + Changed = true; + NMD->eraseFromParent(); + NMD = NULL; + + for (SmallVector::iterator I = MDs.begin(), + E = MDs.end(); I != E; ++I) { + bool FnIsLive = false; + if (Function *F = DISubprogram(*I).getFunction()) + if (M.getFunction(F->getName())) + FnIsLive = true; + if (FnIsLive) { + if (!NMD) + NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); + NMD->addOperand(*I); + } else { + // Remove llvm.dbg.lv.fnname named mdnode which may have been used + // to hold debug info for dead function's local variables. + StringRef FName = DISubprogram(*I).getLinkageName(); + if (FName.empty()) + FName = DISubprogram(*I).getName(); + if (NamedMDNode *LVNMD = + M.getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) + LVNMD->eraseFromParent(); + } + } + } + + return Changed; +} diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp index 473e83c..a74686f 100644 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ b/lib/Transforms/IPO/StructRetPromotion.cpp @@ -107,12 +107,12 @@ CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { // Check if it is ok to perform this promotion. if (isSafeToUpdateAllCallers(F) == false) { DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - NumRejectedSRETUses++; + ++NumRejectedSRETUses; return 0; } DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - NumSRET++; + ++NumSRET; // [1] Replace use of sret parameter AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", F->getEntryBlock().begin()); @@ -171,16 +171,16 @@ bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { // Check FirstArg's users. for (Value::use_iterator ArgI = FirstArg->use_begin(), ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - + User *U = *ArgI; // If FirstArg user is a CallInst that does not correspond to current // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast(ArgI)) { + if (CallInst *CI = dyn_cast(U)) { if (CI != Call) return false; } // If FirstArg user is a GEP whose all users are not LoadInst then // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast(ArgI)) { + else if (GetElementPtrInst *GEP = dyn_cast(U)) { // TODO : Use dom info and insert PHINodes to collect get results // from multiple call sites for this GEP. if (GEP->getParent() != Call->getParent()) diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index c7b04a4..24e0528 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -178,7 +178,8 @@ public: Instruction *visitPHINode(PHINode &PN); Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP); Instruction *visitAllocaInst(AllocaInst &AI); - Instruction *visitFree(Instruction &FI); + Instruction *visitMalloc(Instruction &FI); + Instruction *visitFree(CallInst &FI); Instruction *visitLoadInst(LoadInst &LI); Instruction *visitStoreInst(StoreInst &SI); Instruction *visitBranchInst(BranchInst &BI); diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 8586054..3f4a857 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1584,6 +1584,19 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { if ((match(A, m_Not(m_Specific(B))) && match(D, m_Not(m_Specific(C))))) return BinaryOperator::CreateXor(C, B); + + // ((A|B)&1)|(B&-2) -> (A&1) | B + if (match(A, m_Or(m_Value(V1), m_Specific(B))) || + match(A, m_Or(m_Specific(B), m_Value(V1)))) { + Instruction *Ret = FoldOrWithConstants(I, Op1, V1, B, C); + if (Ret) return Ret; + } + // (B&-2)|((A|B)&1) -> (A&1) | B + if (match(B, m_Or(m_Specific(A), m_Value(V1))) || + match(B, m_Or(m_Value(V1), m_Specific(A)))) { + Instruction *Ret = FoldOrWithConstants(I, Op0, A, V1, D); + if (Ret) return Ret; + } } // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. @@ -1599,19 +1612,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } - // ((A|B)&1)|(B&-2) -> (A&1) | B - if (match(Op0, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op0, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op1, A, B, C); - if (Ret) return Ret; - } - // (B&-2)|((A|B)&1) -> (A&1) | B - if (match(Op1, m_And(m_Or(m_Value(A), m_Value(B)), m_Value(C))) || - match(Op1, m_And(m_Value(C), m_Or(m_Value(A), m_Value(B))))) { - Instruction *Ret = FoldOrWithConstants(I, Op0, A, B, C); - if (Ret) return Ret; - } - // (~A | ~B) == (~(A & B)) - De Morgan's Law if (Value *Op0NotVal = dyn_castNotVal(Op0)) if (Value *Op1NotVal = dyn_castNotVal(Op1)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 38e7b6e..85251a8 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -112,8 +112,8 @@ unsigned InstCombiner::GetOrEnforceKnownAlignment(Value *V, } Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { - unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getOperand(1)); - unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getOperand(2)); + unsigned DstAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(0)); + unsigned SrcAlign = GetOrEnforceKnownAlignment(MI->getArgOperand(1)); unsigned MinAlign = std::min(DstAlign, SrcAlign); unsigned CopyAlign = MI->getAlignment(); @@ -125,7 +125,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with // load/store. - ConstantInt *MemOpLength = dyn_cast(MI->getOperand(3)); + ConstantInt *MemOpLength = dyn_cast(MI->getArgOperand(2)); if (MemOpLength == 0) return 0; // Source and destination pointer types are always "i8*" for intrinsic. See @@ -140,9 +140,9 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // Use an integer load+store unless we can find something better. unsigned SrcAddrSp = - cast(MI->getOperand(2)->getType())->getAddressSpace(); + cast(MI->getArgOperand(1)->getType())->getAddressSpace(); unsigned DstAddrSp = - cast(MI->getOperand(1)->getType())->getAddressSpace(); + cast(MI->getArgOperand(0)->getType())->getAddressSpace(); const IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3); Type *NewSrcPtrTy = PointerType::get(IntType, SrcAddrSp); @@ -154,8 +154,8 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { // an i64 load+store, here because this improves the odds that the source or // dest address will be promotable. See if we can find a better type than the // integer datatype. - Value *StrippedDest = MI->getOperand(1)->stripPointerCasts(); - if (StrippedDest != MI->getOperand(1)) { + Value *StrippedDest = MI->getArgOperand(0)->stripPointerCasts(); + if (StrippedDest != MI->getArgOperand(0)) { const Type *SrcETy = cast(StrippedDest->getType()) ->getElementType(); if (TD && SrcETy->isSized() && TD->getTypeStoreSize(SrcETy) == Size) { @@ -189,15 +189,15 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) { SrcAlign = std::max(SrcAlign, CopyAlign); DstAlign = std::max(DstAlign, CopyAlign); - Value *Src = Builder->CreateBitCast(MI->getOperand(2), NewSrcPtrTy); - Value *Dest = Builder->CreateBitCast(MI->getOperand(1), NewDstPtrTy); + Value *Src = Builder->CreateBitCast(MI->getArgOperand(1), NewSrcPtrTy); + Value *Dest = Builder->CreateBitCast(MI->getArgOperand(0), NewDstPtrTy); Instruction *L = new LoadInst(Src, "tmp", MI->isVolatile(), SrcAlign); InsertNewInstBefore(L, *MI); InsertNewInstBefore(new StoreInst(L, Dest, MI->isVolatile(), DstAlign), *MI); // Set the size of the copy to 0, it will be deleted on the next iteration. - MI->setOperand(3, Constant::getNullValue(MemOpLength->getType())); + MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType())); return MI; } @@ -250,6 +250,8 @@ Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) { Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (isFreeCall(&CI)) return visitFree(CI); + if (isMalloc(&CI)) + return visitMalloc(CI); // If the caller function is nounwind, mark the call as nounwind, even if the // callee isn't. @@ -261,7 +263,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { IntrinsicInst *II = dyn_cast(&CI); if (!II) return visitCallSite(&CI); - + // Intrinsics cannot occur in an invoke, so handle them here instead of in // visitCallSite. if (MemIntrinsic *MI = dyn_cast(II)) { @@ -287,11 +289,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (GVSrc->isConstant()) { Module *M = CI.getParent()->getParent()->getParent(); Intrinsic::ID MemCpyID = Intrinsic::memcpy; - const Type *Tys[3] = { CI.getOperand(1)->getType(), - CI.getOperand(2)->getType(), - CI.getOperand(3)->getType() }; - CI.setCalledFunction( - Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); + const Type *Tys[3] = { CI.getArgOperand(0)->getType(), + CI.getArgOperand(1)->getType(), + CI.getArgOperand(2)->getType() }; + CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3)); Changed = true; } } @@ -311,7 +312,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Instruction *I = SimplifyMemSet(MSI)) return I; } - + if (Changed) return II; } @@ -322,10 +323,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (!TD) break; const Type *ReturnTy = CI.getType(); - bool Min = (cast(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast(II->getArgOperand(1))->getZExtValue() == 1); // Get to the real allocated thing and offset as fast as possible. - Value *Op1 = II->getOperand(1)->stripPointerCasts(); + Value *Op1 = II->getArgOperand(0)->stripPointerCasts(); // If we've stripped down to a single global variable that we // can know the size of then just return that. @@ -393,7 +394,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset); return ReplaceInstUsesWith(CI, RetVal); - } // Do not return "I don't know" here. Later optimization passes could @@ -402,45 +402,45 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } case Intrinsic::bswap: // bswap(bswap(x)) -> x - if (IntrinsicInst *Operand = dyn_cast(II->getOperand(1))) + if (IntrinsicInst *Operand = dyn_cast(II->getArgOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) - return ReplaceInstUsesWith(CI, Operand->getOperand(1)); + return ReplaceInstUsesWith(CI, Operand->getArgOperand(0)); // bswap(trunc(bswap(x))) -> trunc(lshr(x, c)) - if (TruncInst *TI = dyn_cast(II->getOperand(1))) { + if (TruncInst *TI = dyn_cast(II->getArgOperand(0))) { if (IntrinsicInst *Operand = dyn_cast(TI->getOperand(0))) if (Operand->getIntrinsicID() == Intrinsic::bswap) { unsigned C = Operand->getType()->getPrimitiveSizeInBits() - TI->getType()->getPrimitiveSizeInBits(); Value *CV = ConstantInt::get(Operand->getType(), C); - Value *V = Builder->CreateLShr(Operand->getOperand(1), CV); + Value *V = Builder->CreateLShr(Operand->getArgOperand(0), CV); return new TruncInst(V, TI->getType()); } } break; case Intrinsic::powi: - if (ConstantInt *Power = dyn_cast(II->getOperand(2))) { + if (ConstantInt *Power = dyn_cast(II->getArgOperand(1))) { // powi(x, 0) -> 1.0 if (Power->isZero()) return ReplaceInstUsesWith(CI, ConstantFP::get(CI.getType(), 1.0)); // powi(x, 1) -> x if (Power->isOne()) - return ReplaceInstUsesWith(CI, II->getOperand(1)); + return ReplaceInstUsesWith(CI, II->getArgOperand(0)); // powi(x, -1) -> 1/x if (Power->isAllOnesValue()) return BinaryOperator::CreateFDiv(ConstantFP::get(CI.getType(), 1.0), - II->getOperand(1)); + II->getArgOperand(0)); } break; case Intrinsic::cttz: { // If all bits below the first known one are known zero, // this value is constant. - const IntegerType *IT = cast(II->getOperand(1)->getType()); + const IntegerType *IT = cast(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned TrailingZeros = KnownOne.countTrailingZeros(); APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros)); @@ -453,11 +453,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ctlz: { // If all bits above the first known one are known zero, // this value is constant. - const IntegerType *IT = cast(II->getOperand(1)->getType()); + const IntegerType *IT = cast(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); - ComputeMaskedBits(II->getOperand(1), APInt::getAllOnesValue(BitWidth), + ComputeMaskedBits(II->getArgOperand(0), APInt::getAllOnesValue(BitWidth), KnownZero, KnownOne); unsigned LeadingZeros = KnownOne.countLeadingZeros(); APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros)); @@ -468,8 +468,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } break; case Intrinsic::uadd_with_overflow: { - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); - const IntegerType *IT = cast(II->getOperand(1)->getType()); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + const IntegerType *IT = cast(II->getArgOperand(0)->getType()); uint32_t BitWidth = IT->getBitWidth(); APInt Mask = APInt::getSignBit(BitWidth); APInt LHSKnownZero(BitWidth, 0); @@ -513,19 +513,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // FALL THROUGH uadd into sadd case Intrinsic::sadd_with_overflow: // Canonicalize constants into the RHS. - if (isa(II->getOperand(1)) && - !isa(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa(II->getArgOperand(0)) && + !isa(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X + undef -> undef - if (isa(II->getOperand(2))) + if (isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { // X + 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { @@ -533,7 +533,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -541,38 +541,38 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ssub_with_overflow: // undef - X -> undef // X - undef -> undef - if (isa(II->getOperand(1)) || - isa(II->getOperand(2))) + if (isa(II->getArgOperand(0)) || + isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + if (ConstantInt *RHS = dyn_cast(II->getArgOperand(1))) { // X - 0 -> {X, false} if (RHS->isZero()) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. - if (isa(II->getOperand(1)) && - !isa(II->getOperand(2))) { - Value *LHS = II->getOperand(1); - II->setOperand(1, II->getOperand(2)); - II->setOperand(2, LHS); + if (isa(II->getArgOperand(0)) && + !isa(II->getArgOperand(1))) { + Value *LHS = II->getArgOperand(0); + II->setArgOperand(0, II->getArgOperand(1)); + II->setArgOperand(1, LHS); return II; } // X * undef -> undef - if (isa(II->getOperand(2))) + if (isa(II->getArgOperand(1))) return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); - if (ConstantInt *RHSI = dyn_cast(II->getOperand(2))) { + if (ConstantInt *RHSI = dyn_cast(II->getArgOperand(1))) { // X*0 -> {0, false} if (RHSI->isZero()) return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); @@ -580,11 +580,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // X * 1 -> {X, false} if (RHSI->equalsInt(1)) { Constant *V[] = { - UndefValue::get(II->getOperand(1)->getType()), + UndefValue::get(II->getArgOperand(0)->getType()), ConstantInt::getFalse(II->getContext()) }; Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); - return InsertValueInst::Create(Struct, II->getOperand(1), 0); + return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } break; @@ -595,8 +595,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::x86_sse2_loadu_dq: // Turn PPC lvx -> load if the pointer is known aligned. // Turn X86 loadups -> load if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); return new LoadInst(Ptr); } @@ -604,22 +604,22 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_stvx: case Intrinsic::ppc_altivec_stvxl: // Turn stvx -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(2), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(1), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(1)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(2), OpPtrTy); - return new StoreInst(II->getOperand(1), Ptr); + PointerType::getUnqual(II->getArgOperand(0)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy); + return new StoreInst(II->getArgOperand(0), Ptr); } break; case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: // Turn X86 storeu -> store if the pointer is known aligned. - if (GetOrEnforceKnownAlignment(II->getOperand(1), 16) >= 16) { + if (GetOrEnforceKnownAlignment(II->getArgOperand(0), 16) >= 16) { const Type *OpPtrTy = - PointerType::getUnqual(II->getOperand(2)->getType()); - Value *Ptr = Builder->CreateBitCast(II->getOperand(1), OpPtrTy); - return new StoreInst(II->getOperand(2), Ptr); + PointerType::getUnqual(II->getArgOperand(1)->getType()); + Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), OpPtrTy); + return new StoreInst(II->getArgOperand(1), Ptr); } break; @@ -627,12 +627,12 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { // These intrinsics only demands the 0th element of its input vector. If // we can simplify the input based on that, do so now. unsigned VWidth = - cast(II->getOperand(1)->getType())->getNumElements(); + cast(II->getArgOperand(0)->getType())->getNumElements(); APInt DemandedElts(VWidth, 1); APInt UndefElts(VWidth, 0); - if (Value *V = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + if (Value *V = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts)) { - II->setOperand(1, V); + II->setArgOperand(0, V); return II; } break; @@ -640,7 +640,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. - if (ConstantVector *Mask = dyn_cast(II->getOperand(3))) { + if (ConstantVector *Mask = dyn_cast(II->getArgOperand(2))) { assert(Mask->getNumOperands() == 16 && "Bad type for intrinsic!"); // Check that all of the elements are integer constants or undefs. @@ -655,8 +655,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (AllEltsOk) { // Cast the input vectors to byte vectors. - Value *Op0 = Builder->CreateBitCast(II->getOperand(1), Mask->getType()); - Value *Op1 = Builder->CreateBitCast(II->getOperand(2), Mask->getType()); + Value *Op0 = Builder->CreateBitCast(II->getArgOperand(0), Mask->getType()); + Value *Op1 = Builder->CreateBitCast(II->getArgOperand(1), Mask->getType()); Value *Result = UndefValue::get(Op0->getType()); // Only extract each element once. @@ -689,7 +689,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. - if (IntrinsicInst *SS = dyn_cast(II->getOperand(1))) { + if (IntrinsicInst *SS = dyn_cast(II->getArgOperand(0))) { if (SS->getIntrinsicID() == Intrinsic::stacksave) { BasicBlock::iterator BI = SS; if (&*++BI == II) @@ -772,13 +772,13 @@ protected: NewInstruction = IC->ReplaceInstUsesWith(*CI, With); } bool isFoldable(unsigned SizeCIOp, unsigned SizeArgOp, bool isString) const { - if (ConstantInt *SizeCI = dyn_cast(CI->getOperand(SizeCIOp))) { + if (ConstantInt *SizeCI = dyn_cast(CI->getArgOperand(SizeCIOp - CallInst::ArgOffset))) { if (SizeCI->isAllOnesValue()) return true; if (isString) return SizeCI->getZExtValue() >= - GetStringLength(CI->getOperand(SizeArgOp)); - if (ConstantInt *Arg = dyn_cast(CI->getOperand(SizeArgOp))) + GetStringLength(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset)); + if (ConstantInt *Arg = dyn_cast(CI->getArgOperand(SizeArgOp - CallInst::ArgOffset))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); } return false; @@ -846,7 +846,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { UndefValue::get(Type::getInt1PtrTy(Callee->getContext())), CS.getInstruction()); - // If CS dues not return void then replaceAllUsesWith undef. + // If CS does not return void then replaceAllUsesWith undef. // This allows ValueHandlers and custom metadata to adjust itself. if (!CS.getInstruction()->getType()->isVoidTy()) CS.getInstruction()-> @@ -1140,7 +1140,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { IntrinsicInst *Tramp = cast(cast(Callee)->getOperand(0)); - Function *NestF = cast(Tramp->getOperand(2)->stripPointerCasts()); + Function *NestF = cast(Tramp->getArgOperand(1)->stripPointerCasts()); const PointerType *NestFPTy = cast(NestF->getType()); const FunctionType *NestFTy = cast(NestFPTy->getElementType()); @@ -1181,7 +1181,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) { do { if (Idx == NestIdx) { // Add the chain argument and attributes. - Value *NestVal = Tramp->getOperand(3); + Value *NestVal = Tramp->getArgOperand(2); if (NestVal->getType() != NestTy) NestVal = new BitCastInst(NestVal, NestTy, "nest", Caller); NewArgs.push_back(NestVal); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b0137c4..505a0bf 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -22,19 +22,18 @@ using namespace PatternMatch; /// X*Scale+Offset. /// static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale, - int &Offset) { - assert(Val->getType()->isIntegerTy(32) && "Unexpected allocation size type!"); + uint64_t &Offset) { if (ConstantInt *CI = dyn_cast(Val)) { Offset = CI->getZExtValue(); Scale = 0; - return ConstantInt::get(Type::getInt32Ty(Val->getContext()), 0); + return ConstantInt::get(Val->getType(), 0); } if (BinaryOperator *I = dyn_cast(Val)) { if (ConstantInt *RHS = dyn_cast(I->getOperand(1))) { if (I->getOpcode() == Instruction::Shl) { // This is a value scaled by '1 << the shift amt'. - Scale = 1U << RHS->getZExtValue(); + Scale = UINT64_C(1) << RHS->getZExtValue(); Offset = 0; return I->getOperand(0); } @@ -100,7 +99,7 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // See if we can satisfy the modulus by pulling a scale out of the array // size argument. unsigned ArraySizeScale; - int ArrayOffset; + uint64_t ArrayOffset; Value *NumElements = // See if the array size is a decomposable linear expr. DecomposeSimpleLinearExpr(AI.getOperand(0), ArraySizeScale, ArrayOffset); @@ -114,13 +113,13 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, if (Scale == 1) { Amt = NumElements; } else { - Amt = ConstantInt::get(Type::getInt32Ty(CI.getContext()), Scale); + Amt = ConstantInt::get(AI.getArraySize()->getType(), Scale); // Insert before the alloca, not before the cast. Amt = AllocaBuilder.CreateMul(Amt, NumElements, "tmp"); } - if (int Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { - Value *Off = ConstantInt::get(Type::getInt32Ty(CI.getContext()), + if (uint64_t Offset = (AllocElTySize*ArrayOffset)/CastElTySize) { + Value *Off = ConstantInt::get(AI.getArraySize()->getType(), Offset, true); Amt = AllocaBuilder.CreateAdd(Amt, Off, "tmp"); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 861cf92..6c00586 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1423,7 +1423,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, switch (II->getIntrinsicID()) { case Intrinsic::bswap: Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(II->getContext(), RHSV.byteSwap())); return &ICI; case Intrinsic::ctlz: @@ -1431,7 +1431,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // ctz(A) == bitwidth(a) -> A == 0 and likewise for != if (RHSV == RHS->getType()->getBitWidth()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0)); return &ICI; } @@ -1440,13 +1440,13 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, // popcount(A) == 0 -> A == 0 and likewise for != if (RHS->isZero()) { Worklist.Add(II); - ICI.setOperand(0, II->getOperand(1)); + ICI.setOperand(0, II->getArgOperand(0)); ICI.setOperand(1, RHS); return &ICI; } break; default: - break; + break; } } } @@ -1924,35 +1924,6 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } break; } - case Instruction::Call: - // If we have (malloc != null), and if the malloc has a single use, we - // can assume it is successful and remove the malloc. - if (isMalloc(LHSI) && LHSI->hasOneUse() && - isa(RHSC)) { - // Need to explicitly erase malloc call here, instead of adding it to - // Worklist, because it won't get DCE'd from the Worklist since - // isInstructionTriviallyDead() returns false for function calls. - // It is OK to replace LHSI/MallocCall with Undef because the - // instruction that uses it will be erased via Worklist. - if (extractMallocCall(LHSI)) { - LHSI->replaceAllUsesWith(UndefValue::get(LHSI->getType())); - EraseInstFromFunction(*LHSI); - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - if (CallInst* MallocCall = extractMallocCallFromBitCast(LHSI)) - if (MallocCall->hasOneUse()) { - MallocCall->replaceAllUsesWith( - UndefValue::get(MallocCall->getType())); - EraseInstFromFunction(*MallocCall); - Worklist.Add(LHSI); // The malloc's bitcast use. - return ReplaceInstUsesWith(I, - ConstantInt::get(Type::getInt1Ty(I.getContext()), - !I.isTrueWhenEqual())); - } - } - break; case Instruction::IntToPtr: // icmp pred inttoptr(X), null -> icmp pred X, 0 if (RHSC->isNullValue() && TD && diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 0f2a24f..8933a0b 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -13,6 +13,7 @@ #include "InstCombine.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -22,6 +23,18 @@ using namespace llvm; STATISTIC(NumDeadStore, "Number of dead stores eliminated"); Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { + // Ensure that the alloca array size argument has type intptr_t, so that + // any casting is exposed early. + if (TD) { + const Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); + if (AI.getArraySize()->getType() != IntPtrTy) { + Value *V = Builder->CreateIntCast(AI.getArraySize(), + IntPtrTy, false); + AI.setOperand(0, V); + return &AI; + } + } + // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 if (AI.isArrayAllocation()) { // Check C != 1 if (const ConstantInt *C = dyn_cast(AI.getArraySize())) { @@ -352,10 +365,11 @@ DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { return 0; for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI) { - if (DbgDeclareInst *DI = dyn_cast(UI)) + User *U = *UI; + if (DbgDeclareInst *DI = dyn_cast(U)) return DI; - if (isa(UI) && UI->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast(UI->use_begin())) + if (isa(U) && U->hasOneUse()) { + if (DbgDeclareInst *DI = dyn_cast(U->use_begin())) return DI; } } @@ -511,17 +525,20 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Determine whether Dest has exactly two predecessors and, if so, compute // the other predecessor. pred_iterator PI = pred_begin(DestBB); + BasicBlock *P = *PI; BasicBlock *OtherBB = 0; - if (*PI != StoreBB) - OtherBB = *PI; - ++PI; - if (PI == pred_end(DestBB)) + + if (P != StoreBB) + OtherBB = P; + + if (++PI == pred_end(DestBB)) return false; - if (*PI != StoreBB) { + P = *PI; + if (P != StoreBB) { if (OtherBB) return false; - OtherBB = *PI; + OtherBB = P; } if (++PI != pred_end(DestBB)) return false; diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 65f0393..f7fc62f 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -230,8 +230,9 @@ static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { bool isAddressTaken = false; for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ++UI) { - if (isa(UI)) continue; - if (StoreInst *SI = dyn_cast(*UI)) { + User *U = *UI; + if (isa(U)) continue; + if (StoreInst *SI = dyn_cast(U)) { // If storing TO the alloca, then the address isn't taken. if (SI->getOperand(1) == AI) continue; } diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index c958cde..c44fe9d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -329,6 +329,37 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, } } + // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1 + // and (X ((X >>s 31) & (C2 - C1)) + C1 + // FIXME: Type and constness constraints could be lifted, but we have to + // watch code size carefully. We should consider xor instead of + // sub/add when we decide to do that. + if (const IntegerType *Ty = dyn_cast(CmpLHS->getType())) { + if (TrueVal->getType() == Ty) { + if (ConstantInt *Cmp = dyn_cast(CmpRHS)) { + ConstantInt *C1 = NULL, *C2 = NULL; + if (Pred == ICmpInst::ICMP_SGT && Cmp->isAllOnesValue()) { + C1 = dyn_cast(TrueVal); + C2 = dyn_cast(FalseVal); + } else if (Pred == ICmpInst::ICMP_SLT && Cmp->isNullValue()) { + C1 = dyn_cast(FalseVal); + C2 = dyn_cast(TrueVal); + } + if (C1 && C2) { + // This shift results in either -1 or 0. + Value *AShr = Builder->CreateAShr(CmpLHS, Ty->getBitWidth()-1); + + // Check if we can express the operation with a single or. + if (C2->isAllOnesValue()) + return ReplaceInstUsesWith(SI, Builder->CreateOr(AShr, C1)); + + Value *And = Builder->CreateAnd(AShr, C2->getValue()-C1->getValue()); + return ReplaceInstUsesWith(SI, Builder->CreateAdd(And, C1)); + } + } + } + } + if (CmpLHS == TrueVal && CmpRHS == FalseVal) { // Transform (X == Y) ? X : Y -> Y if (Pred == ICmpInst::ICMP_EQ) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index 836bda3..e5ce8a6 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -404,7 +404,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) { isPowerOf2_32(BitWidth) && Log2_32(BitWidth) == Op1C->getZExtValue()){ bool isCtPop = II->getIntrinsicID() == Intrinsic::ctpop; Constant *RHS = ConstantInt::getSigned(Op0->getType(), isCtPop ? -1:0); - Value *Cmp = Builder->CreateICmpEQ(II->getOperand(1), RHS); + Value *Cmp = Builder->CreateICmpEQ(II->getArgOperand(0), RHS); return new ZExtInst(Cmp, II->getType()); } } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index cd41844..adf7a76 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -732,10 +732,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, // the right place. Instruction *NewVal; if (InputBit > ResultBit) - NewVal = BinaryOperator::CreateLShr(I->getOperand(1), + NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0), ConstantInt::get(I->getType(), InputBit-ResultBit)); else - NewVal = BinaryOperator::CreateShl(I->getOperand(1), + NewVal = BinaryOperator::CreateShl(II->getArgOperand(0), ConstantInt::get(I->getType(), ResultBit-InputBit)); NewVal->takeName(I); return InsertNewInstBefore(NewVal, *I); @@ -1052,12 +1052,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_mul_sd: case Intrinsic::x86_sse2_min_sd: case Intrinsic::x86_sse2_max_sd: - TmpV = SimplifyDemandedVectorElts(II->getOperand(1), DemandedElts, + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth+1); - if (TmpV) { II->setOperand(1, TmpV); MadeChange = true; } - TmpV = SimplifyDemandedVectorElts(II->getOperand(2), DemandedElts, + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts, UndefElts2, Depth+1); - if (TmpV) { II->setOperand(2, TmpV); MadeChange = true; } + if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; } // If only the low elt is demanded and this is a scalarizable intrinsic, // scalarize it now. @@ -1069,8 +1069,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, case Intrinsic::x86_sse2_sub_sd: case Intrinsic::x86_sse2_mul_sd: // TODO: Lower MIN/MAX/ABS/etc - Value *LHS = II->getOperand(1); - Value *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0); + Value *RHS = II->getArgOperand(1); // Extract the element as scalars. LHS = InsertNewInstBefore(ExtractElementInst::Create(LHS, ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II); diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index af9ec5c..af2958f 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -710,8 +710,55 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } -Instruction *InstCombiner::visitFree(Instruction &FI) { - Value *Op = FI.getOperand(1); + + +static bool IsOnlyNullComparedAndFreed(const Value &V) { + for (Value::const_use_iterator UI = V.use_begin(), UE = V.use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (isFreeCall(U)) + continue; + if (const ICmpInst *ICI = dyn_cast(U)) + if (ICI->isEquality() && isa(ICI->getOperand(1))) + continue; + return false; + } + return true; +} + +Instruction *InstCombiner::visitMalloc(Instruction &MI) { + // If we have a malloc call which is only used in any amount of comparisons + // to null and free calls, delete the calls and replace the comparisons with + // true or false as appropriate. + if (IsOnlyNullComparedAndFreed(MI)) { + for (Value::use_iterator UI = MI.use_begin(), UE = MI.use_end(); + UI != UE;) { + // We can assume that every remaining use is a free call or an icmp eq/ne + // to null, so the cast is safe. + Instruction *I = cast(*UI); + + // Early increment here, as we're about to get rid of the user. + ++UI; + + if (isFreeCall(I)) { + EraseInstFromFunction(*cast(I)); + continue; + } + // Again, the cast is safe. + ICmpInst *C = cast(I); + ReplaceInstUsesWith(*C, ConstantInt::get(Type::getInt1Ty(C->getContext()), + C->isFalseWhenEqual())); + EraseInstFromFunction(*C); + } + return EraseInstFromFunction(MI); + } + return 0; +} + + + +Instruction *InstCombiner::visitFree(CallInst &FI) { + Value *Op = FI.getArgOperand(0); // free undef -> unreachable. if (isa(Op)) { @@ -726,23 +773,6 @@ Instruction *InstCombiner::visitFree(Instruction &FI) { if (isa(Op)) return EraseInstFromFunction(FI); - // If we have a malloc call whose only use is a free call, delete both. - if (isMalloc(Op)) { - if (CallInst* CI = extractMallocCallFromBitCast(Op)) { - if (Op->hasOneUse() && CI->hasOneUse()) { - EraseInstFromFunction(FI); - EraseInstFromFunction(*CI); - return EraseInstFromFunction(*cast(Op)); - } - } else { - // Op is a call to malloc - if (Op->hasOneUse()) { - EraseInstFromFunction(FI); - return EraseInstFromFunction(*cast(Op)); - } - } - } - return 0; } @@ -896,7 +926,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { if (IntrinsicInst *II = dyn_cast(Agg)) { // We're extracting from an intrinsic, see if we're the only user, which // allows us to simplify multiple result intrinsics to simpler things that - // just get one value.. + // just get one value. if (II->hasOneUse()) { // Check if we're grabbing the overflow bit or the result of a 'with // overflow' intrinsic. If it's the latter we can remove the intrinsic @@ -905,7 +935,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::uadd_with_overflow: case Intrinsic::sadd_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateAdd(LHS, RHS); @@ -914,7 +944,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateSub(LHS, RHS); @@ -923,7 +953,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { case Intrinsic::umul_with_overflow: case Intrinsic::smul_with_overflow: if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); II->replaceAllUsesWith(UndefValue::get(II->getType())); EraseInstFromFunction(*II); return BinaryOperator::CreateMul(LHS, RHS); diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index 5650150..41e3a39 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -143,7 +143,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,entry,i); - IncrementCounterInBlock(entry, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -166,7 +166,7 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ Initializer[i++] = (Uncounted); @@ -189,11 +189,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block printEdgeCounter(edge,BB,i); - IncrementCounterInBlock(BB, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block printEdgeCounter(edge,Succ,i); - IncrementCounterInBlock(Succ, i, Counters); NumEdgesInserted++; + IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); } else { diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index 8662a82..1a30e9b 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -61,8 +61,8 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements); - Instruction *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), - "newargc", InsertPos); + CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(), + "newargc", InsertPos); // If argc or argv are not available in main, just pass null values in. Function::arg_iterator AI; @@ -73,10 +73,10 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, if (AI->getType() != ArgVTy) { Instruction::CastOps opcode = CastInst::getCastOpcode(AI, false, ArgVTy, false); - InitCall->setOperand(2, + InitCall->setArgOperand(1, CastInst::Create(opcode, AI, ArgVTy, "argv.cast", InitCall)); } else { - InitCall->setOperand(2, AI); + InitCall->setArgOperand(1, AI); } /* FALL THROUGH */ @@ -93,12 +93,12 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName, } opcode = CastInst::getCastOpcode(AI, true, Type::getInt32Ty(Context), true); - InitCall->setOperand(1, + InitCall->setArgOperand(0, CastInst::Create(opcode, AI, Type::getInt32Ty(Context), "argc.cast", InitCall)); } else { AI->replaceAllUsesWith(InitCall); - InitCall->setOperand(1, AI); + InitCall->setArgOperand(0, AI); } case 0: break; diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp index 6135992..dcf14a6 100644 --- a/lib/Transforms/Scalar/ABCD.cpp +++ b/lib/Transforms/Scalar/ABCD.cpp @@ -230,7 +230,7 @@ class ABCD : public FunctionPass { DenseMapIterator begin = map.begin(); DenseMapIterator end = map.end(); for (; begin != end; ++begin) { - begin->second.clear(); + begin->second.clear(); } map.clear(); } @@ -396,8 +396,8 @@ class ABCD : public FunctionPass { /// this case the method returns true, otherwise false. It also obtains the /// Instruction and ConstantInt from the BinaryOperator and returns it. bool createBinaryOperatorInfo(BinaryOperator *BO, Instruction **I1, - Instruction **I2, ConstantInt **C1, - ConstantInt **C2); + Instruction **I2, ConstantInt **C1, + ConstantInt **C2); /// This method creates a constraint between a Sigma and an Instruction. /// These constraints are created as soon as we find a comparator that uses a diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 5a49841..2d19467 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -83,7 +83,7 @@ bool ADCE::runOnFunction(Function& F) { for (SmallVector::iterator I = worklist.begin(), E = worklist.end(); I != E; ++I) { - NumRemoved++; + ++NumRemoved; (*I)->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 93e9bfb..272066c 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -548,7 +548,8 @@ protected: CI->eraseFromParent(); } bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { - if (ConstantInt *SizeCI = dyn_cast(CI->getOperand(SizeCIOp))) + if (ConstantInt *SizeCI = dyn_cast(CI->getArgOperand(SizeCIOp + - CallInst::ArgOffset))) return SizeCI->isAllOnesValue(); return false; } @@ -559,7 +560,7 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // Lower all uses of llvm.objectsize.* IntrinsicInst *II = dyn_cast(CI); if (II && II->getIntrinsicID() == Intrinsic::objectsize) { - bool Min = (cast(II->getOperand(2))->getZExtValue() == 1); + bool Min = (cast(II->getArgOperand(1))->getZExtValue() == 1); const Type *ReturnTy = CI->getType(); Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); CI->replaceAllUsesWith(RetVal); @@ -759,8 +760,7 @@ bool CodeGenPrepare::OptimizeInlineAsmInst(Instruction *I, CallSite CS, } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.isIndirect) { diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 09c01d3..e047e4f 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -56,7 +56,8 @@ namespace { } bool runOnBasicBlock(BasicBlock &BB); - bool handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep); + bool handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep); bool handleEndBlock(BasicBlock &BB); bool RemoveUndeadPointers(Value *Ptr, uint64_t killPointerSize, BasicBlock::iterator &BBI, @@ -73,7 +74,6 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addPreserved(); AU.addPreserved(); } @@ -123,14 +123,15 @@ static Value *getPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); if (MemIntrinsic *MI = dyn_cast(I)) - return MI->getOperand(1); - - switch (cast(I)->getIntrinsicID()) { + return MI->getArgOperand(0); + + IntrinsicInst *II = cast(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: - return I->getOperand(1); + return II->getArgOperand(0); case Intrinsic::lifetime_end: - return I->getOperand(2); + return II->getArgOperand(1); } } @@ -147,12 +148,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) { if (MemIntrinsic *MI = dyn_cast(I)) { Len = MI->getLength(); } else { - switch (cast(I)->getIntrinsicID()) { + IntrinsicInst *II = cast(I); + switch (II->getIntrinsicID()) { default: assert(false && "Unexpected intrinsic!"); case Intrinsic::init_trampoline: return -1u; case Intrinsic::lifetime_end: - Len = I->getOperand(1); + Len = II->getArgOperand(0); break; } } @@ -201,8 +203,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { if (InstDep.isNonLocal()) continue; // Handle frees whose dependencies are non-trivial. - if (isFreeCall(Inst)) { - MadeChange |= handleFreeWithNonTrivialDependency(Inst, InstDep); + if (const CallInst *F = isFreeCall(Inst)) { + MadeChange |= handleFreeWithNonTrivialDependency(F, InstDep); continue; } @@ -218,7 +220,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { isElidable(DepStore)) { // Delete the store and now-dead instructions that feed it. DeleteDeadInstruction(DepStore); - NumFastStores++; + ++NumFastStores; MadeChange = true; // DeleteDeadInstruction can delete the current instruction in loop @@ -249,7 +251,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -270,7 +272,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { BBI = BB.begin(); else if (BBI != BB.begin()) // Revisit this instruction if possible. --BBI; - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -287,7 +289,8 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { /// handleFreeWithNonTrivialDependency - Handle frees of entire structures whose /// dependency is a store to a field of that structure. -bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { +bool DSE::handleFreeWithNonTrivialDependency(const CallInst *F, + MemDepResult Dep) { AliasAnalysis &AA = getAnalysis(); Instruction *Dependency = Dep.getInst(); @@ -297,13 +300,13 @@ bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) { Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject(); // Check for aliasing. - if (AA.alias(F->getOperand(1), 1, DepPointer, 1) != + if (AA.alias(F->getArgOperand(0), 1, DepPointer, 1) != AliasAnalysis::MustAlias) return false; // DCE instructions only used to calculate that store DeleteDeadInstruction(Dependency); - NumFastStores++; + ++NumFastStores; return true; } @@ -349,9 +352,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (deadPointers.count(pointerOperand)) { // DCE instructions only used to calculate that store. Instruction *Dead = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Dead, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; } @@ -371,9 +374,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // However, if this load is unused and not volatile, we can go ahead and // remove it, and not have to worry about it making our pointer undead! if (L->use_empty() && !L->isVolatile()) { - BBI++; + ++BBI; DeleteDeadInstruction(L, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -391,9 +394,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Dead alloca's can be DCE'd when we reach them if (A->use_empty()) { - BBI++; + ++BBI; DeleteDeadInstruction(A, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; } @@ -426,9 +429,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { getPointerSize(*I)); if (A == AliasAnalysis::ModRef) - modRef++; + ++modRef; else - other++; + ++other; if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) dead.push_back(*I); @@ -442,9 +445,9 @@ bool DSE::handleEndBlock(BasicBlock &BB) { } else if (isInstructionTriviallyDead(BBI)) { // For any non-memory-affecting non-terminators, DCE them as we reach them Instruction *Inst = BBI; - BBI++; + ++BBI; DeleteDeadInstruction(Inst, &deadPointers); - NumFastOther++; + ++NumFastOther; MadeChange = true; continue; } @@ -497,7 +500,7 @@ bool DSE::RemoveUndeadPointers(Value *killPointer, uint64_t killPointerSize, // Remove it! ++BBI; DeleteDeadInstruction(S, &deadPointers); - NumFastStores++; + ++NumFastStores; MadeChange = true; continue; diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index ca8ab49..88b6776 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -35,6 +35,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" @@ -271,7 +272,8 @@ Expression ValueTable::create_expression(CallInst* C) { e.function = C->getCalledFunction(); e.opcode = Expression::CALL; - for (CallInst::op_iterator I = C->op_begin()+1, E = C->op_end(); + CallSite CS(C); + for (CallInst::op_iterator I = CS.arg_begin(), E = CS.arg_end(); I != E; ++I) e.varargs.push_back(lookup_or_add(*I)); @@ -447,14 +449,14 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { if (local_dep.isDef()) { CallInst* local_cdep = cast(local_dep.getInst()); - if (local_cdep->getNumOperands() != C->getNumOperands()) { + if (local_cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(local_cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(local_cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -504,13 +506,13 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) { return nextValueNumber++; } - if (cdep->getNumOperands() != C->getNumOperands()) { + if (cdep->getNumArgOperands() != C->getNumArgOperands()) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; } - for (unsigned i = 1; i < C->getNumOperands(); ++i) { - uint32_t c_vn = lookup_or_add(C->getOperand(i)); - uint32_t cd_vn = lookup_or_add(cdep->getOperand(i)); + for (unsigned i = 0, e = C->getNumArgOperands(); i < e; ++i) { + uint32_t c_vn = lookup_or_add(C->getArgOperand(i)); + uint32_t cd_vn = lookup_or_add(cdep->getArgOperand(i)); if (c_vn != cd_vn) { valueNumbering[C] = nextValueNumber; return nextValueNumber++; @@ -1500,7 +1502,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1723,7 +1725,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, MD->invalidateCachedPointerInfo(V); VN.erase(LI); toErase.push_back(LI); - NumPRELoad++; + ++NumPRELoad; return true; } @@ -1784,7 +1786,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(AvailVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1830,7 +1832,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(StoredVal); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1860,7 +1862,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { MD->invalidateCachedPointerInfo(DepLI); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1871,7 +1873,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } @@ -1882,7 +1884,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(UndefValue::get(L->getType())); VN.erase(L); toErase.push_back(L); - NumGVNLoad++; + ++NumGVNLoad; return true; } } @@ -2014,7 +2016,7 @@ bool GVN::runOnFunction(Function& F) { BasicBlock *BB = FI; ++FI; bool removedBlock = MergeBlockIntoPredecessor(BB, this); - if (removedBlock) NumGVNBlocks++; + if (removedBlock) ++NumGVNBlocks; Changed |= removedBlock; } @@ -2126,27 +2128,28 @@ bool GVN::performPRE(Function &F) { for (pred_iterator PI = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; // We're not interested in PRE where the block is its // own predecessor, or in blocks with predecessors // that are not reachable. - if (*PI == CurrentBlock) { + if (P == CurrentBlock) { NumWithout = 2; break; - } else if (!localAvail.count(*PI)) { + } else if (!localAvail.count(P)) { NumWithout = 2; break; } DenseMap::iterator predV = - localAvail[*PI]->table.find(ValNo); - if (predV == localAvail[*PI]->table.end()) { - PREPred = *PI; - NumWithout++; + localAvail[P]->table.find(ValNo); + if (predV == localAvail[P]->table.end()) { + PREPred = P; + ++NumWithout; } else if (predV->second == CurInst) { NumWithout = 2; } else { - predMap[*PI] = predV->second; - NumWith++; + predMap[P] = predV->second; + ++NumWith; } } @@ -2201,7 +2204,7 @@ bool GVN::performPRE(Function &F) { PREInstr->setName(CurInst->getName() + ".pre"); predMap[PREPred] = PREInstr; VN.add(PREInstr, ValNo); - NumGVNPRE++; + ++NumGVNPRE; // Update the availability map to include the new instruction. localAvail[PREPred]->table.insert(std::make_pair(ValNo, PREInstr)); @@ -2211,8 +2214,10 @@ bool GVN::performPRE(Function &F) { CurInst->getName() + ".pre-phi", CurrentBlock->begin()); for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) - Phi->addIncoming(predMap[*PI], *PI); + PE = pred_end(CurrentBlock); PI != PE; ++PI) { + BasicBlock *P = *PI; + Phi->addIncoming(predMap[P], P); + } VN.add(Phi, ValNo); localAvail[CurrentBlock]->table[ValNo] = Phi; diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 36bea67..b5c9dd8 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -467,6 +467,17 @@ void IndVarSimplify::EliminateIVRemainders() { } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { + // If LoopSimplify form is not available, stay out of trouble. Some notes: + // - LSR currently only supports LoopSimplify-form loops. Indvars' + // canonicalization can be a pessimization without LSR to "clean up" + // afterwards. + // - We depend on having a preheader; in particular, + // Loop::getCanonicalInductionVariable only supports loops with preheaders, + // and we're in trouble if we can't find the induction variable even when + // we've manually inserted one. + if (!L->isLoopSimplifyForm()) + return false; + IU = &getAnalysis(); LI = &getAnalysis(); SE = &getAnalysis(); @@ -760,8 +771,9 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { bool UsedInLoop = false; for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; ++UI) { - BasicBlock *UseBB = cast(UI)->getParent(); - if (PHINode *P = dyn_cast(UI)) { + User *U = *UI; + BasicBlock *UseBB = cast(U)->getParent(); + if (PHINode *P = dyn_cast(U)) { unsigned i = PHINode::getIncomingValueNumForOperand(UI.getOperandNo()); UseBB = P->getIncomingBlock(i); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index df05b71..edce14c 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -18,6 +18,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" @@ -288,14 +289,15 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ // Perhaps getConstantOnEdge should be smart enough to do this? for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. - Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB); + Constant *PredCst = LVI->getConstantOnEdge(V, P, BB); if (PredCst == 0 || (!isa(PredCst) && !isa(PredCst))) continue; - Result.push_back(std::make_pair(dyn_cast(PredCst), *PI)); + Result.push_back(std::make_pair(dyn_cast(PredCst), P)); } return !Result.empty(); @@ -345,8 +347,19 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ } for (unsigned i = 0, e = RHSVals.size(); i != e; ++i) if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0) { - Result.push_back(RHSVals[i]); - Result.back().first = InterestingVal; + // If we already inferred a value for this block on the LHS, don't + // re-add it. + bool HasValue = false; + for (unsigned r = 0, e = Result.size(); r != e; ++r) + if (Result[r].second == RHSVals[i].second) { + HasValue = true; + break; + } + + if (!HasValue) { + Result.push_back(RHSVals[i]); + Result.back().first = InterestingVal; + } } return !Result.empty(); } @@ -409,20 +422,21 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){ (!isa(Cmp->getOperand(0)) || cast(Cmp->getOperand(0))->getParent() != BB)) { Constant *RHSCst = cast(Cmp->getOperand(1)); - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; // If the value is known by LazyValueInfo to be a constant in a // predecessor, use that information to try to thread this block. LazyValueInfo::Tristate Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0), - RHSCst, *PI, BB); + RHSCst, P, BB); if (Res == LazyValueInfo::Unknown) continue; Constant *ResC = ConstantInt::get(Cmp->getType(), Res); - Result.push_back(std::make_pair(cast(ResC), *PI)); + Result.push_back(std::make_pair(cast(ResC), P)); } - + return !Result.empty(); } } @@ -538,18 +552,22 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { (CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition. pred_iterator PI = pred_begin(BB), E = pred_end(BB); if (isa(BB->getTerminator())) { - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast(P->getTerminator())) if (PBI->isConditional() && PBI->getCondition() == Condition && - ProcessBranchOnDuplicateCond(*PI, BB)) + ProcessBranchOnDuplicateCond(P, BB)) return true; + } } else { assert(isa(BB->getTerminator()) && "Unknown jump terminator"); - for (; PI != E; ++PI) - if (SwitchInst *PSI = dyn_cast((*PI)->getTerminator())) + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (SwitchInst *PSI = dyn_cast(P->getTerminator())) if (PSI->getCondition() == Condition && - ProcessSwitchOnDuplicateCond(*PI, BB)) + ProcessSwitchOnDuplicateCond(P, BB)) return true; + } } } @@ -569,19 +587,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { // If we have a comparison, loop over the predecessors to see if there is // a condition with a lexically identical value. pred_iterator PI = pred_begin(BB), E = pred_end(BB); - for (; PI != E; ++PI) - if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) - if (PBI->isConditional() && *PI != BB) { + for (; PI != E; ++PI) { + BasicBlock *P = *PI; + if (BranchInst *PBI = dyn_cast(P->getTerminator())) + if (PBI->isConditional() && P != BB) { if (CmpInst *CI = dyn_cast(PBI->getCondition())) { if (CI->getOperand(0) == CondCmp->getOperand(0) && CI->getOperand(1) == CondCmp->getOperand(1) && CI->getPredicate() == CondCmp->getPredicate()) { // TODO: Could handle things like (x != 4) --> (x == 17) - if (ProcessBranchOnDuplicateCond(*PI, BB)) + if (ProcessBranchOnDuplicateCond(P, BB)) return true; } } } + } } } @@ -869,9 +889,15 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // Add all the unavailable predecessors to the PredsToSplit list. for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB); - PI != PE; ++PI) - if (!AvailablePredSet.count(*PI)) - PredsToSplit.push_back(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + // If the predecessor is an indirect goto, we can't split the edge. + if (isa(P->getTerminator())) + return false; + + if (!AvailablePredSet.count(P)) + PredsToSplit.push_back(P); + } // Split them out to their own block. UnavailablePred = @@ -903,11 +929,12 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { // have multiple entries here. for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; ++PI) { + BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), - std::make_pair(*PI, (Value*)0)); + std::make_pair(P, (Value*)0)); - assert(I != AvailablePreds.end() && I->first == *PI && + assert(I != AvailablePreds.end() && I->first == P && "Didn't find entry for predecessor!"); PN->addIncoming(I->second, I->first); diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 48817ab..e4894e9 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -83,7 +83,7 @@ bool LoopDeletion::IsLoopDead(Loop* L, if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - BI++; + ++BI; } // Make sure that no instructions in the block have potential side-effects. @@ -176,7 +176,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); - BI++; + ++BI; } // Update the dominator tree and remove the instructions and blocks that will @@ -226,7 +226,7 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { LPM.deleteLoopFromQueue(L); Changed = true; - NumDeleted++; + ++NumDeleted; return Changed; } diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp index 101ff5b..31058e5 100644 --- a/lib/Transforms/Scalar/LoopIndexSplit.cpp +++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp @@ -649,7 +649,7 @@ bool LoopIndexSplit::updateLoopIterationSpace() { } } } - NumRestrictBounds++; + ++NumRestrictBounds; return true; } @@ -958,11 +958,11 @@ bool LoopIndexSplit::splitLoop() { continue; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); - BI != BE; ++BI) { + BI != BE; ++BI) { Instruction *Inst = BI; if (!Inst->isSafeToSpeculativelyExecute() && !isa(Inst) - && !isa(Inst) && !isa(Inst)) + && !isa(Inst) && !isa(Inst)) return false; } } @@ -1016,13 +1016,13 @@ bool LoopIndexSplit::splitLoop() { BSV = getMax(BSV, IVStartValue, Sign, PHTerm); // [*] Clone Loop - DenseMap ValueMap; - Loop *BLoop = CloneLoop(L, LPM, LI, ValueMap, this); + ValueMap VMap; + Loop *BLoop = CloneLoop(L, LPM, LI, VMap, this); Loop *ALoop = L; // [*] ALoop's exiting edge enters BLoop's header. // ALoop's original exit block becomes BLoop's exit block. - PHINode *B_IndVar = cast(ValueMap[IndVar]); + PHINode *B_IndVar = cast(VMap[IndVar]); BasicBlock *A_ExitingBlock = ExitCondition->getParent(); BranchInst *A_ExitInsn = dyn_cast(A_ExitingBlock->getTerminator()); @@ -1047,7 +1047,7 @@ bool LoopIndexSplit::splitLoop() { for (BasicBlock::iterator BI = ALoop->getHeader()->begin(), BE = ALoop->getHeader()->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast(BI)) { - PHINode *PNClone = cast(ValueMap[PN]); + PHINode *PNClone = cast(VMap[PN]); InverseMap[PNClone] = PN; } else break; @@ -1085,11 +1085,11 @@ bool LoopIndexSplit::splitLoop() { // block. Remove incoming PHINode values from ALoop's exiting block. // Add new incoming values from BLoop's incoming exiting value. // Update BLoop exit block's dominator info.. - BasicBlock *B_ExitingBlock = cast(ValueMap[A_ExitingBlock]); + BasicBlock *B_ExitingBlock = cast(VMap[A_ExitingBlock]); for (BasicBlock::iterator BI = B_ExitBlock->begin(), BE = B_ExitBlock->end(); BI != BE; ++BI) { if (PHINode *PN = dyn_cast(BI)) { - PN->addIncoming(ValueMap[PN->getIncomingValueForBlock(A_ExitingBlock)], + PN->addIncoming(VMap[PN->getIncomingValueForBlock(A_ExitingBlock)], B_ExitingBlock); PN->removeIncomingValue(A_ExitingBlock); } else @@ -1131,7 +1131,7 @@ bool LoopIndexSplit::splitLoop() { removeBlocks(A_InactiveBranch, L, A_ActiveBranch); //[*] Eliminate split condition's inactive branch in from BLoop. - BasicBlock *B_SplitCondBlock = cast(ValueMap[A_SplitCondBlock]); + BasicBlock *B_SplitCondBlock = cast(VMap[A_SplitCondBlock]); BranchInst *B_BR = cast(B_SplitCondBlock->getTerminator()); BasicBlock *B_InactiveBranch = NULL; BasicBlock *B_ActiveBranch = NULL; @@ -1146,9 +1146,9 @@ bool LoopIndexSplit::splitLoop() { //[*] Move exit condition into split condition block to avoid // executing dead loop iteration. - ICmpInst *B_ExitCondition = cast(ValueMap[ExitCondition]); - Instruction *B_IndVarIncrement = cast(ValueMap[IVIncrement]); - ICmpInst *B_SplitCondition = cast(ValueMap[SplitCondition]); + ICmpInst *B_ExitCondition = cast(VMap[ExitCondition]); + Instruction *B_IndVarIncrement = cast(VMap[IVIncrement]); + ICmpInst *B_SplitCondition = cast(VMap[SplitCondition]); moveExitCondition(A_SplitCondBlock, A_ActiveBranch, A_ExitBlock, ExitCondition, cast(SplitCondition), IndVar, IVIncrement, @@ -1159,7 +1159,7 @@ bool LoopIndexSplit::splitLoop() { B_SplitCondition, B_IndVar, B_IndVarIncrement, BLoop, EVOpNum); - NumIndexSplit++; + ++NumIndexSplit; return true; } diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 5004483..16c4a15 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -147,7 +147,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { continue; // PHI nodes don't count. if (isa(OI)) continue; // Debug intrinsics don't count as size. - Size++; + ++Size; } if (Size > MAX_HEADER_SIZE) @@ -263,7 +263,7 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { preserveCanonicalLoopForm(LPM); - NumRotated++; + ++NumRotated; return true; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 86ea3eb..a250a88 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -392,12 +392,13 @@ static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { return isa(SE.getSignExtendExpr(A, WideTy)); } -/// isMulSExtable - Return true if the given add can be sign-extended +/// isMulSExtable - Return true if the given mul can be sign-extended /// without changing its value. -static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) { +static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { const Type *WideTy = - IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); - return isa(SE.getSignExtendExpr(A, WideTy)); + IntegerType::get(SE.getContext(), + SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); + return isa(SE.getSignExtendExpr(M, WideTy)); } /// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined @@ -413,20 +414,28 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (LHS == RHS) return SE.getConstant(LHS->getType(), 1); - // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do some - // folding. - if (RHS->isAllOnesValue()) - return SE.getMulExpr(LHS, RHS); + // Handle a few RHS special cases. + const SCEVConstant *RC = dyn_cast(RHS); + if (RC) { + const APInt &RA = RC->getValue()->getValue(); + // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do + // some folding. + if (RA.isAllOnesValue()) + return SE.getMulExpr(LHS, RC); + // Handle x /s 1 as x. + if (RA == 1) + return LHS; + } // Check for a division of a constant by a constant. if (const SCEVConstant *C = dyn_cast(LHS)) { - const SCEVConstant *RC = dyn_cast(RHS); if (!RC) return 0; - if (C->getValue()->getValue().srem(RC->getValue()->getValue()) != 0) + const APInt &LA = C->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + if (LA.srem(RA) != 0) return 0; - return SE.getConstant(C->getValue()->getValue() - .sdiv(RC->getValue()->getValue())); + return SE.getConstant(LA.sdiv(RA)); } // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. @@ -440,6 +449,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, if (!Step) return 0; return SE.getAddRecExpr(Start, Step, AR->getLoop()); } + return 0; } // Distribute the sdiv over add operands, if the add doesn't overflow. @@ -455,10 +465,11 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return SE.getAddExpr(Ops); } + return 0; } // Check for a multiply operand that we can pull RHS out of. - if (const SCEVMulExpr *Mul = dyn_cast(LHS)) + if (const SCEVMulExpr *Mul = dyn_cast(LHS)) { if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { SmallVector Ops; bool Found = false; @@ -475,6 +486,8 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, } return Found ? SE.getMulExpr(Ops) : 0; } + return 0; + } // Otherwise we don't know. return 0; @@ -546,7 +559,7 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - if (II->getOperand(1) == OperandVal) + if (II->getArgOperand(0) == OperandVal) isAddress = true; break; } @@ -568,7 +581,7 @@ static const Type *getAccessType(const Instruction *Inst) { case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: case Intrinsic::x86_sse2_storel_dq: - AccessTy = II->getOperand(1)->getType(); + AccessTy = II->getArgOperand(0)->getType(); break; } } @@ -976,6 +989,8 @@ public: void dump() const; }; +} + /// HasFormula - Test whether this use as a formula which has the same /// registers as the given formula. bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { @@ -1203,6 +1218,32 @@ static bool isAlwaysFoldable(const SCEV *S, return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI); } +namespace { + +/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding +/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. +struct UseMapDenseMapInfo { + static std::pair getEmptyKey() { + return std::make_pair(reinterpret_cast(-1), LSRUse::Basic); + } + + static std::pair getTombstoneKey() { + return std::make_pair(reinterpret_cast(-2), LSRUse::Basic); + } + + static unsigned + getHashValue(const std::pair &V) { + unsigned Result = DenseMapInfo::getHashValue(V.first); + Result ^= DenseMapInfo::getHashValue(unsigned(V.second)); + return Result; + } + + static bool isEqual(const std::pair &LHS, + const std::pair &RHS) { + return LHS == RHS; + } +}; + /// FormulaSorter - This class implements an ordering for formulae which sorts /// the by their standalone cost. class FormulaSorter { @@ -1275,7 +1316,9 @@ class LSRInstance { } // Support for sharing of LSRUses between LSRFixups. - typedef DenseMap UseMapTy; + typedef DenseMap, + size_t, + UseMapDenseMapInfo> UseMapTy; UseMapTy UseMap; bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, @@ -1613,8 +1656,11 @@ ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { NewRHS = Sel->getOperand(1); else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) NewRHS = Sel->getOperand(2); + else if (const SCEVUnknown *SU = dyn_cast(MaxRHS)) + NewRHS = SU->getValue(); else - llvm_unreachable("Max doesn't match expected pattern!"); + // Max doesn't match expected pattern. + return Cond; // Determine the new comparison opcode. It may be signed or unsigned, // and the original comparison may be either equality or inequality. @@ -1805,6 +1851,8 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, NewMaxOffset = NewOffset; } // Check for a mismatched access type, and fall back conservatively as needed. + // TODO: Be less conservative when the type is similar and can use the same + // addressing modes. if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) NewAccessTy = Type::getVoidTy(AccessTy->getContext()); @@ -1833,7 +1881,7 @@ LSRInstance::getUse(const SCEV *&Expr, } std::pair P = - UseMap.insert(std::make_pair(Expr, 0)); + UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); if (!P.second) { // A use already existed with this base. size_t LUIdx = P.first->second; @@ -1919,7 +1967,7 @@ void LSRInstance::CollectInterestingTypesAndFactors() { Strides.insert(AR->getStepRecurrence(SE)); Worklist.push_back(AR->getStart()); } else if (const SCEVAddExpr *Add = dyn_cast(S)) { - Worklist.insert(Worklist.end(), Add->op_begin(), Add->op_end()); + Worklist.append(Add->op_begin(), Add->op_end()); } } while (!Worklist.empty()); } @@ -2086,7 +2134,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { const SCEV *S = Worklist.pop_back_val(); if (const SCEVNAryExpr *N = dyn_cast(S)) - Worklist.insert(Worklist.end(), N->op_begin(), N->op_end()); + Worklist.append(N->op_begin(), N->op_end()); else if (const SCEVCastExpr *C = dyn_cast(S)) Worklist.push_back(C->getOperand()); else if (const SCEVUDivExpr *D = dyn_cast(S)) { @@ -2095,8 +2143,12 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { } else if (const SCEVUnknown *U = dyn_cast(S)) { if (!Inserted.insert(U)) continue; const Value *V = U->getValue(); - if (const Instruction *Inst = dyn_cast(V)) + if (const Instruction *Inst = dyn_cast(V)) { + // Look for instructions defined outside the loop. if (L->contains(Inst)) continue; + } else if (isa(V)) + // Undef doesn't have a live range, so it doesn't matter. + continue; for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); UI != UE; ++UI) { const Instruction *UserInst = dyn_cast(*UI); @@ -2155,20 +2207,23 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() { /// separate registers. If C is non-null, multiply each subexpression by C. static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, SmallVectorImpl &Ops, + SmallVectorImpl &UninterestingOps, + const Loop *L, ScalarEvolution &SE) { if (const SCEVAddExpr *Add = dyn_cast(S)) { // Break out add operands. for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); I != E; ++I) - CollectSubexprs(*I, C, Ops, SE); + CollectSubexprs(*I, C, Ops, UninterestingOps, L, SE); return; } else if (const SCEVAddRecExpr *AR = dyn_cast(S)) { // Split a non-zero base out of an addrec. if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), C, Ops, SE); - CollectSubexprs(AR->getStart(), C, Ops, SE); + AR->getLoop()), + C, Ops, UninterestingOps, L, SE); + CollectSubexprs(AR->getStart(), C, Ops, UninterestingOps, L, SE); return; } } else if (const SCEVMulExpr *Mul = dyn_cast(S)) { @@ -2178,13 +2233,17 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, dyn_cast(Mul->getOperand(0))) { CollectSubexprs(Mul->getOperand(1), C ? cast(SE.getMulExpr(C, Op0)) : Op0, - Ops, SE); + Ops, UninterestingOps, L, SE); return; } } - // Otherwise use the value itself. - Ops.push_back(C ? SE.getMulExpr(C, S) : S); + // Otherwise use the value itself. Loop-variant "unknown" values are + // uninteresting; we won't be able to do anything meaningful with them. + if (!C && isa(S) && !S->isLoopInvariant(L)) + UninterestingOps.push_back(S); + else + Ops.push_back(C ? SE.getMulExpr(C, S) : S); } /// GenerateReassociations - Split out subexpressions from adds and the bases of @@ -2198,8 +2257,15 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { const SCEV *BaseReg = Base.BaseRegs[i]; - SmallVector AddOps; - CollectSubexprs(BaseReg, 0, AddOps, SE); + SmallVector AddOps, UninterestingAddOps; + CollectSubexprs(BaseReg, 0, AddOps, UninterestingAddOps, L, SE); + + // Add any uninteresting values as one register, as we won't be able to + // form any interesting reassociation opportunities with them. They'll + // just have to be added inside the loop no matter what we do. + if (!UninterestingAddOps.empty()) + AddOps.push_back(SE.getAddExpr(UninterestingAddOps)); + if (AddOps.size() == 1) continue; for (SmallVectorImpl::const_iterator J = AddOps.begin(), @@ -2212,11 +2278,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, continue; // Collect all operands except *J. - SmallVector InnerAddOps; - for (SmallVectorImpl::const_iterator K = AddOps.begin(), - KE = AddOps.end(); K != KE; ++K) - if (K != J) - InnerAddOps.push_back(*K); + SmallVector InnerAddOps + ( ((const SmallVector &)AddOps).begin(), J); + InnerAddOps.append + (next(J), ((const SmallVector &)AddOps).end()); // Don't leave just a constant behind in a register if the constant could // be folded into an immediate field. @@ -2350,13 +2415,12 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, for (SmallSetVector::const_iterator I = Factors.begin(), E = Factors.end(); I != E; ++I) { int64_t Factor = *I; - Formula F = Base; // Check that the multiplication doesn't overflow. - if (F.AM.BaseOffs == INT64_MIN && Factor == -1) + if (Base.AM.BaseOffs == INT64_MIN && Factor == -1) continue; - F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; - if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs) + int64_t NewBaseOffs = (uint64_t)Base.AM.BaseOffs * Factor; + if (NewBaseOffs / Factor != Base.AM.BaseOffs) continue; // Check that multiplying with the use offset doesn't overflow. @@ -2367,6 +2431,9 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, if (Offset / Factor != LU.MinOffset) continue; + Formula F = Base; + F.AM.BaseOffs = NewBaseOffs; + // Check that this scale is legal. if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI)) continue; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index ae7bf40..0c900ff 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -445,7 +445,7 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { // This is a very ad-hoc heuristic. if (Metrics.NumInsts > Threshold || Metrics.NumBlocks * 5 > Threshold || - Metrics.NeverInline) { + Metrics.containsIndirectBr || Metrics.isRecursive) { DEBUG(dbgs() << "NOT unswitching loop %" << currentLoop->getHeader()->getName() << ", cost too high: " << currentLoop->getBlocks().size() << "\n"); @@ -457,21 +457,21 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val) { } // RemapInstruction - Convert the instruction operands from referencing the -// current values into those specified by ValueMap. +// current values into those specified by VMap. // static inline void RemapInstruction(Instruction *I, - DenseMap &ValueMap) { + ValueMap &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) Op = It->second; + ValueMap::iterator It = VMap.find(Op); + if (It != VMap.end()) Op = It->second; I->setOperand(op, Op); } } /// CloneLoop - Recursively clone the specified loop and all of its children, /// mapping the blocks with the specified map. -static Loop *CloneLoop(Loop *L, Loop *PL, DenseMap &VM, +static Loop *CloneLoop(Loop *L, Loop *PL, ValueMap &VM, LoopInfo *LI, LPPassManager *LPM) { Loop *New = new Loop(); LPM->insertLoop(New, PL); @@ -615,11 +615,11 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, // the loop preheader and exit blocks), keeping track of the mapping between // the instructions and blocks. NewBlocks.reserve(LoopBlocks.size()); - DenseMap ValueMap; + ValueMap VMap; for (unsigned i = 0, e = LoopBlocks.size(); i != e; ++i) { - BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], ValueMap, ".us", F); + BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[i], VMap, ".us", F); NewBlocks.push_back(NewBB); - ValueMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. + VMap[LoopBlocks[i]] = NewBB; // Keep the BB mapping. LPM->cloneBasicBlockSimpleAnalysis(LoopBlocks[i], NewBB, L); } @@ -629,7 +629,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, NewBlocks[0], F->end()); // Now we create the new Loop object for the versioned loop. - Loop *NewLoop = CloneLoop(L, L->getParentLoop(), ValueMap, LI, LPM); + Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM); Loop *ParentLoop = L->getParentLoop(); if (ParentLoop) { // Make sure to add the cloned preheader and exit blocks to the parent loop @@ -638,7 +638,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, } for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) { - BasicBlock *NewExit = cast(ValueMap[ExitBlocks[i]]); + BasicBlock *NewExit = cast(VMap[ExitBlocks[i]]); // The new exit block should be in the same loop as the old one. if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[i])) ExitBBLoop->addBasicBlockToLoop(NewExit, LI->getBase()); @@ -653,8 +653,8 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (BasicBlock::iterator I = ExitSucc->begin(); isa(I); ++I) { PN = cast(I); Value *V = PN->getIncomingValueForBlock(ExitBlocks[i]); - DenseMap::iterator It = ValueMap.find(V); - if (It != ValueMap.end()) V = It->second; + ValueMap::iterator It = VMap.find(V); + if (It != VMap.end()) V = It->second; PN->addIncoming(V, NewExit); } } @@ -663,7 +663,7 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val, for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) for (BasicBlock::iterator I = NewBlocks[i]->begin(), E = NewBlocks[i]->end(); I != E; ++I) - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); // Rewrite the original preheader to select between versions of the loop. BranchInst *OldBR = cast(loopPreheader->getTerminator()); diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index 3611b8e..0e566c5 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -632,7 +632,7 @@ bool MemCpyOpt::performCallSlotOptzn(MemCpyInst *cpy, CallInst *C) { // Remove the memcpy MD.removeInstruction(cpy); cpy->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } @@ -710,7 +710,7 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { if (MD.getDependency(C) == dep) { MD.removeInstruction(M); M->eraseFromParent(); - NumMemCpyInstr++; + ++NumMemCpyInstr; return true; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 5aca9cdc..98452f5 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -407,13 +407,14 @@ static Value *NegateValue(Value *V, Instruction *BI) { // Okay, we need to materialize a negated version of V with an instruction. // Scan the use lists of V to see if we have one already. for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){ - if (!BinaryOperator::isNeg(*UI)) continue; + User *U = *UI; + if (!BinaryOperator::isNeg(U)) continue; // We found one! Now we have to make sure that the definition dominates // this use. We do this by moving it to the entry block (if it is a // non-instruction value) or right after the definition. These negates will // be zapped by reassociate later, so we don't need much finesse here. - BinaryOperator *TheNeg = cast(*UI); + BinaryOperator *TheNeg = cast(U); // Verify that the negate is in this function, V might be a constant expr. if (TheNeg->getParent()->getParent() != BI->getParent()->getParent()) diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 5ca9ce3..dd445f6 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -926,7 +926,7 @@ void SROA::DoScalarReplacement(AllocaInst *AI, DeleteDeadInstructions(); AI->eraseFromParent(); - NumReplaced++; + ++NumReplaced; } /// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, @@ -965,11 +965,11 @@ void SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, isSafeGEP(GEPI, AI, GEPOffset, Info); if (!Info.isUnsafe) isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); - } else if (MemIntrinsic *MI = dyn_cast(UI)) { + } else if (MemIntrinsic *MI = dyn_cast(User)) { ConstantInt *Length = dyn_cast(MI->getLength()); if (Length) isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, - UI.getOperandNo() == 1, Info); + UI.getOperandNo() == CallInst::ArgOffset, Info); else MarkUnsafe(Info); } else if (LoadInst *LI = dyn_cast(User)) { @@ -1272,6 +1272,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If there is an other pointer, we want to convert it to the same pointer // type as AI has, so we can GEP through it safely. if (OtherPtr) { + unsigned AddrSpace = + cast(OtherPtr->getType())->getAddressSpace(); // Remove bitcasts and all-zero GEPs from OtherPtr. This is an // optimization, but it's also required to detect the corner case where @@ -1279,20 +1281,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // OtherPtr may be a bitcast or GEP that currently being rewritten. (This // function is only called for mem intrinsics that access the whole // aggregate, so non-zero GEPs are not an issue here.) - while (1) { - if (BitCastInst *BC = dyn_cast(OtherPtr)) { - OtherPtr = BC->getOperand(0); - continue; - } - if (GetElementPtrInst *GEP = dyn_cast(OtherPtr)) { - // All zero GEPs are effectively bitcasts. - if (GEP->hasAllZeroIndices()) { - OtherPtr = GEP->getOperand(0); - continue; - } - } - break; - } + OtherPtr = OtherPtr->stripPointerCasts(); + // Copying the alloca to itself is a no-op: just delete it. if (OtherPtr == AI || OtherPtr == NewElts[0]) { // This code will run twice for a no-op memcpy -- once for each operand. @@ -1304,15 +1294,13 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, return; } - if (ConstantExpr *BCE = dyn_cast(OtherPtr)) - if (BCE->getOpcode() == Instruction::BitCast) - OtherPtr = BCE->getOperand(0); - // If the pointer is not the right type, insert a bitcast to the right // type. - if (OtherPtr->getType() != AI->getType()) - OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), - MI); + const Type *NewTy = + PointerType::get(AI->getType()->getElementType(), AddrSpace); + + if (OtherPtr->getType() != NewTy) + OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); } // Process each element of the aggregate. @@ -1373,7 +1361,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, // If the stored element is zero (common case), just store a null // constant. Constant *StoreVal; - if (ConstantInt *CI = dyn_cast(MI->getOperand(2))) { + if (ConstantInt *CI = dyn_cast(MI->getArgOperand(1))) { if (CI->isZero()) { StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> } else { @@ -1436,7 +1424,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, Value *Ops[] = { SROADest ? EltPtr : OtherElt, // Dest ptr SROADest ? OtherElt : EltPtr, // Src ptr - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size // Align ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign), MI->getVolatileCst() @@ -1451,8 +1439,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } else { assert(isa(MI)); Value *Ops[] = { - EltPtr, MI->getOperand(2), // Dest, Value, - ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size + EltPtr, MI->getArgOperand(1), // Dest, Value, + ConstantInt::get(MI->getArgOperand(2)->getType(), EltSize), // Size Zero, // Align ConstantInt::get(Type::getInt1Ty(MI->getContext()), 0) // isVolatile }; @@ -1655,7 +1643,12 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); } - ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + // Don't create an 'or x, 0' on the first iteration. + if (!isa(ResultVal) || + !cast(ResultVal)->isNullValue()) + ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); + else + ResultVal = SrcField; } // Handle tail padding by truncating the result @@ -1794,7 +1787,7 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, if (isOffset) return false; // If the memintrinsic isn't using the alloca as the dest, reject it. - if (UI.getOperandNo() != 1) return false; + if (UI.getOperandNo() != CallInst::ArgOffset) return false; // If the source of the memcpy/move is not a constant global, reject it. if (!PointsToConstantGlobal(MI->getSource())) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 9744100..49d93a2 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -137,6 +137,9 @@ static bool MarkAliveBlocks(BasicBlock *BB, // they should be changed to unreachable by passes that can't modify the // CFG. if (StoreInst *SI = dyn_cast(BBI)) { + // Don't touch volatile stores. + if (SI->isVolatile()) continue; + Value *Ptr = SI->getOperand(1); if (isa(Ptr) || diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 7414be7..b1c6191 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -66,6 +66,11 @@ public: this->TD = TD; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); + + // We never change the calling convention. + if (CI->getCallingConv() != llvm::CallingConv::C) + return NULL; + return CallOptimizer(CI->getCalledFunction(), CI, B); } }; @@ -92,6 +97,20 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } +/// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality +/// comparisons with With. +static bool IsOnlyUsedInEqualityComparison(Value *V, Value *With) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast(*UI)) + if (IC->isEquality() && IC->getOperand(1) == With) + continue; + // Unknown instruction. + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// @@ -110,8 +129,8 @@ struct StrCatOpt : public LibCallOptimization { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); // See if we can get the length of the input string. uint64_t Len = GetStringLength(Src); @@ -162,12 +181,12 @@ struct StrNCatOpt : public StrCatOpt { return 0; // Extract some information from the instruction - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); uint64_t Len; // We don't do anything if length is not constant - if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) Len = LengthArg->getZExtValue(); else return 0; @@ -207,11 +226,11 @@ struct StrChrOpt : public LibCallOptimization { FT->getParamType(0) != FT->getReturnType()) return 0; - Value *SrcStr = CI->getOperand(1); + Value *SrcStr = CI->getArgOperand(0); // If the second operand is non-constant, see if we can compute the length // of the input string and turn this into memchr. - ConstantInt *CharC = dyn_cast(CI->getOperand(2)); + ConstantInt *CharC = dyn_cast(CI->getArgOperand(1)); if (CharC == 0) { // These optimizations require TargetData. if (!TD) return 0; @@ -220,7 +239,7 @@ struct StrChrOpt : public LibCallOptimization { if (Len == 0 || !FT->getParamType(1)->isIntegerTy(32))// memchr needs i32. return 0; - return EmitMemChr(SrcStr, CI->getOperand(2), // include nul. + return EmitMemChr(SrcStr, CI->getArgOperand(1), // include nul. ConstantInt::get(TD->getIntPtrType(*Context), Len), B, TD); } @@ -260,12 +279,12 @@ struct StrCmpOpt : public LibCallOptimization { // Verify the "strcmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 2 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strcmp(x,x) -> 0 return ConstantInt::get(CI->getType(), 0); @@ -308,19 +327,19 @@ struct StrNCmpOpt : public LibCallOptimization { // Verify the "strncmp" function prototype. const FunctionType *FT = Callee->getFunctionType(); if (FT->getNumParams() != 3 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || FT->getParamType(0) != FT->getParamType(1) || FT->getParamType(0) != Type::getInt8PtrTy(*Context) || !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2); + Value *Str1P = CI->getArgOperand(0), *Str2P = CI->getArgOperand(1); if (Str1P == Str2P) // strncmp(x,x,n) -> 0 return ConstantInt::get(CI->getType(), 0); // Get the length argument if it is constant. uint64_t Length; - if (ConstantInt *LengthArg = dyn_cast(CI->getOperand(3))) + if (ConstantInt *LengthArg = dyn_cast(CI->getArgOperand(2))) Length = LengthArg->getZExtValue(); else return 0; @@ -328,6 +347,9 @@ struct StrNCmpOpt : public LibCallOptimization { if (Length == 0) // strncmp(x,y,0) -> 0 return ConstantInt::get(CI->getType(), 0); + if (TD && Length == 1) // strncmp(x,y,1) -> memcmp(x,y,1) + return EmitMemCmp(Str1P, Str2P, CI->getArgOperand(2), B, TD); + std::string Str1, Str2; bool HasStr1 = GetConstantStringInfo(Str1P, Str1); bool HasStr2 = GetConstantStringInfo(Str2P, Str2); @@ -365,7 +387,7 @@ struct StrCpyOpt : public LibCallOptimization { FT->getParamType(0) != Type::getInt8PtrTy(*Context)) return 0; - Value *Dst = CI->getOperand(1), *Src = CI->getOperand(2); + Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1); if (Dst == Src) // strcpy(x,x) -> x return Src; @@ -381,7 +403,7 @@ struct StrCpyOpt : public LibCallOptimization { if (OptChkCall) EmitMemCpyChk(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), - CI->getOperand(3), B, TD); + CI->getArgOperand(2), B, TD); else EmitMemCpy(Dst, Src, ConstantInt::get(TD->getIntPtrType(*Context), Len), @@ -402,9 +424,9 @@ struct StrNCpyOpt : public LibCallOptimization { !FT->getParamType(2)->isIntegerTy()) return 0; - Value *Dst = CI->getOperand(1); - Value *Src = CI->getOperand(2); - Value *LenOp = CI->getOperand(3); + Value *Dst = CI->getArgOperand(0); + Value *Src = CI->getArgOperand(1); + Value *LenOp = CI->getArgOperand(2); // See if we can get the length of the input string. uint64_t SrcLen = GetStringLength(Src); @@ -452,7 +474,7 @@ struct StrLenOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy()) return 0; - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); // Constant folding: strlen("xyz") -> 3 if (uint64_t Len = GetStringLength(Src)) @@ -477,7 +499,7 @@ struct StrToOpt : public LibCallOptimization { !FT->getParamType(1)->isPointerTy()) return 0; - Value *EndPtr = CI->getOperand(2); + Value *EndPtr = CI->getArgOperand(1); if (isa(EndPtr)) { CI->setOnlyReadsMemory(); CI->addAttribute(1, Attribute::NoCapture); @@ -500,17 +522,34 @@ struct StrStrOpt : public LibCallOptimization { return 0; // fold strstr(x, x) -> x. - if (CI->getOperand(1) == CI->getOperand(2)) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + if (CI->getArgOperand(0) == CI->getArgOperand(1)) + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); + + // fold strstr(a, b) == a -> strncmp(a, b, strlen(b)) == 0 + if (TD && IsOnlyUsedInEqualityComparison(CI, CI->getArgOperand(0))) { + Value *StrLen = EmitStrLen(CI->getArgOperand(1), B, TD); + Value *StrNCmp = EmitStrNCmp(CI->getArgOperand(0), CI->getArgOperand(1), + StrLen, B, TD); + for (Value::use_iterator UI = CI->use_begin(), UE = CI->use_end(); + UI != UE; ) { + ICmpInst *Old = cast(UI++); + Value *Cmp = B.CreateICmp(Old->getPredicate(), StrNCmp, + ConstantInt::getNullValue(StrNCmp->getType()), + "cmp"); + Old->replaceAllUsesWith(Cmp); + Old->eraseFromParent(); + } + return CI; + } // See if either input string is a constant string. std::string SearchStr, ToFindStr; - bool HasStr1 = GetConstantStringInfo(CI->getOperand(1), SearchStr); - bool HasStr2 = GetConstantStringInfo(CI->getOperand(2), ToFindStr); + bool HasStr1 = GetConstantStringInfo(CI->getArgOperand(0), SearchStr); + bool HasStr2 = GetConstantStringInfo(CI->getArgOperand(1), ToFindStr); // fold strstr(x, "") -> x. if (HasStr2 && ToFindStr.empty()) - return B.CreateBitCast(CI->getOperand(1), CI->getType()); + return B.CreateBitCast(CI->getArgOperand(0), CI->getType()); // If both strings are known, constant fold it. if (HasStr1 && HasStr2) { @@ -520,14 +559,14 @@ struct StrStrOpt : public LibCallOptimization { return Constant::getNullValue(CI->getType()); // strstr("abcd", "bc") -> gep((char*)"abcd", 1) - Value *Result = CastToCStr(CI->getOperand(1), B); + Value *Result = CastToCStr(CI->getArgOperand(0), B); Result = B.CreateConstInBoundsGEP1_64(Result, Offset, "strstr"); return B.CreateBitCast(Result, CI->getType()); } // fold strstr(x, "y") -> strchr(x, 'y'). if (HasStr2 && ToFindStr.size() == 1) - return B.CreateBitCast(EmitStrChr(CI->getOperand(1), ToFindStr[0], B, TD), + return B.CreateBitCast(EmitStrChr(CI->getArgOperand(0), ToFindStr[0], B, TD), CI->getType()); return 0; } @@ -545,13 +584,13 @@ struct MemCmpOpt : public LibCallOptimization { !FT->getReturnType()->isIntegerTy(32)) return 0; - Value *LHS = CI->getOperand(1), *RHS = CI->getOperand(2); + Value *LHS = CI->getArgOperand(0), *RHS = CI->getArgOperand(1); if (LHS == RHS) // memcmp(s,s,x) -> 0 return Constant::getNullValue(CI->getType()); // Make sure we have a constant length. - ConstantInt *LenC = dyn_cast(CI->getOperand(3)); + ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); if (!LenC) return 0; uint64_t Len = LenC->getZExtValue(); @@ -598,9 +637,9 @@ struct MemCpyOpt : public LibCallOptimization { return 0; // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -620,9 +659,9 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - EmitMemMove(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), 1, false, B, TD); - return CI->getOperand(1); + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), 1, false, B, TD); + return CI->getArgOperand(0); } }; @@ -642,10 +681,10 @@ struct MemSetOpt : public LibCallOptimization { return 0; // memset(p, v, n) -> llvm.memset(p, v, n, 1) - Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + Value *Val = B.CreateIntCast(CI->getArgOperand(1), Type::getInt8Ty(*Context), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - return CI->getOperand(1); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + return CI->getArgOperand(0); } }; @@ -666,7 +705,7 @@ struct PowOpt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op1 = CI->getOperand(1), *Op2 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1); if (ConstantFP *Op1C = dyn_cast(Op1)) { if (Op1C->isExactlyValue(1.0)) // pow(1.0, x) -> 1.0 return Op1C; @@ -720,18 +759,18 @@ struct Exp2Opt : public LibCallOptimization { !FT->getParamType(0)->isFloatingPointTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Turn exp2(sitofp(x)) -> ldexp(1.0, sext(x)) if sizeof(x) <= 32 // Turn exp2(uitofp(x)) -> ldexp(1.0, zext(x)) if sizeof(x) < 32 Value *LdExpArg = 0; if (SIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() <= 32) LdExpArg = B.CreateSExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } else if (UIToFPInst *OpC = dyn_cast(Op)) { if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32) LdExpArg = B.CreateZExt(OpC->getOperand(0), - Type::getInt32Ty(*Context), "tmp"); + Type::getInt32Ty(*Context), "tmp"); } if (LdExpArg) { @@ -772,7 +811,7 @@ struct UnaryDoubleFPOpt : public LibCallOptimization { return 0; // If this is something like 'floor((double)floatval)', convert to floorf. - FPExtInst *Cast = dyn_cast(CI->getOperand(1)); + FPExtInst *Cast = dyn_cast(CI->getArgOperand(0)); if (Cast == 0 || !Cast->getOperand(0)->getType()->isFloatTy()) return 0; @@ -797,11 +836,11 @@ struct FFSOpt : public LibCallOptimization { // Just make sure this has 2 arguments of the same FP type, which match the // result type. if (FT->getNumParams() != 1 || - !FT->getReturnType()->isIntegerTy(32) || + !FT->getReturnType()->isIntegerTy(32) || !FT->getParamType(0)->isIntegerTy()) return 0; - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); // Constant fold. if (ConstantInt *CI = dyn_cast(Op)) { @@ -821,7 +860,7 @@ struct FFSOpt : public LibCallOptimization { Value *Cond = B.CreateICmpNE(Op, Constant::getNullValue(ArgType), "tmp"); return B.CreateSelect(Cond, V, - ConstantInt::get(Type::getInt32Ty(*Context), 0)); + ConstantInt::get(Type::getInt32Ty(*Context), 0)); } }; @@ -837,7 +876,7 @@ struct IsDigitOpt : public LibCallOptimization { return 0; // isdigit(c) -> (c-'0') getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateSub(Op, ConstantInt::get(Type::getInt32Ty(*Context), '0'), "isdigittmp"); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 10), @@ -858,7 +897,7 @@ struct IsAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c getOperand(1); + Value *Op = CI->getArgOperand(0); Op = B.CreateICmpULT(Op, ConstantInt::get(Type::getInt32Ty(*Context), 128), "isascii"); return B.CreateZExt(Op, CI->getType()); @@ -877,7 +916,7 @@ struct AbsOpt : public LibCallOptimization { return 0; // abs(x) -> x >s -1 ? x : -x - Value *Op = CI->getOperand(1); + Value *Op = CI->getArgOperand(0); Value *Pos = B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); @@ -899,7 +938,7 @@ struct ToAsciiOpt : public LibCallOptimization { return 0; // isascii(c) -> c & 0x7f - return B.CreateAnd(CI->getOperand(1), + return B.CreateAnd(CI->getArgOperand(0), ConstantInt::get(CI->getType(),0x7F)); } }; @@ -922,7 +961,7 @@ struct PrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(1), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) return 0; // Empty format string -> noop. @@ -954,20 +993,20 @@ struct PrintFOpt : public LibCallOptimization { } // Optimize specific format strings. - // printf("%c", chr) --> putchar(*(i8*)dst) - if (FormatStr == "%c" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isIntegerTy()) { - Value *Res = EmitPutChar(CI->getOperand(2), B, TD); + // printf("%c", chr) --> putchar(chr) + if (FormatStr == "%c" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isIntegerTy()) { + Value *Res = EmitPutChar(CI->getArgOperand(1), B, TD); if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } // printf("%s\n", str) --> puts(str) - if (FormatStr == "%s\n" && CI->getNumOperands() > 2 && - CI->getOperand(2)->getType()->isPointerTy() && + if (FormatStr == "%s\n" && CI->getNumArgOperands() > 1 && + CI->getArgOperand(1)->getType()->isPointerTy() && CI->use_empty()) { - EmitPutS(CI->getOperand(2), B, TD); + EmitPutS(CI->getArgOperand(1), B, TD); return CI; } return 0; @@ -988,11 +1027,11 @@ struct SPrintFOpt : public LibCallOptimization { // Check for a fixed format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // If we just have a format string (nothing else crazy) transform it. - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { // Make sure there's no % in the constant array. We could try to handle // %% -> % in the future if we cared. for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) @@ -1003,7 +1042,7 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), // Copy the nul byte. + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), // Copy the nul byte. ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()+1), 1, false, B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); @@ -1011,16 +1050,17 @@ struct SPrintFOpt : public LibCallOptimization { // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { // sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - Value *V = B.CreateTrunc(CI->getOperand(3), + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + Value *V = B.CreateTrunc(CI->getArgOperand(2), Type::getInt8Ty(*Context), "char"); - Value *Ptr = CastToCStr(CI->getOperand(1), B); + Value *Ptr = CastToCStr(CI->getArgOperand(0), B); B.CreateStore(V, Ptr); Ptr = B.CreateGEP(Ptr, ConstantInt::get(Type::getInt32Ty(*Context), 1), "nul"); @@ -1034,13 +1074,13 @@ struct SPrintFOpt : public LibCallOptimization { if (!TD) return 0; // sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1) - if (!CI->getOperand(3)->getType()->isPointerTy()) return 0; + if (!CI->getArgOperand(2)->getType()->isPointerTy()) return 0; - Value *Len = EmitStrLen(CI->getOperand(3), B, TD); + Value *Len = EmitStrLen(CI->getArgOperand(2), B, TD); Value *IncLen = B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); - EmitMemCpy(CI->getOperand(1), CI->getOperand(3), IncLen, 1, false, B, TD); + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1, false, B, TD); // The sprintf result is the unincremented number of bytes in the string. return B.CreateIntCast(Len, CI->getType(), false); @@ -1064,8 +1104,8 @@ struct FWriteOpt : public LibCallOptimization { return 0; // Get the element size and count. - ConstantInt *SizeC = dyn_cast(CI->getOperand(2)); - ConstantInt *CountC = dyn_cast(CI->getOperand(3)); + ConstantInt *SizeC = dyn_cast(CI->getArgOperand(1)); + ConstantInt *CountC = dyn_cast(CI->getArgOperand(2)); if (!SizeC || !CountC) return 0; uint64_t Bytes = SizeC->getZExtValue()*CountC->getZExtValue(); @@ -1075,8 +1115,8 @@ struct FWriteOpt : public LibCallOptimization { // If this is writing one byte, turn it into fputc. if (Bytes == 1) { // fwrite(S,1,1,F) -> fputc(S[0],F) - Value *Char = B.CreateLoad(CastToCStr(CI->getOperand(1), B), "char"); - EmitFPutC(Char, CI->getOperand(4), B, TD); + Value *Char = B.CreateLoad(CastToCStr(CI->getArgOperand(0), B), "char"); + EmitFPutC(Char, CI->getArgOperand(3), B, TD); return ConstantInt::get(CI->getType(), 1); } @@ -1100,11 +1140,11 @@ struct FPutsOpt : public LibCallOptimization { return 0; // fputs(s,F) --> fwrite(s,1,strlen(s),F) - uint64_t Len = GetStringLength(CI->getOperand(1)); + uint64_t Len = GetStringLength(CI->getArgOperand(0)); if (!Len) return 0; - EmitFWrite(CI->getOperand(1), + EmitFWrite(CI->getArgOperand(0), ConstantInt::get(TD->getIntPtrType(*Context), Len-1), - CI->getOperand(2), B, TD); + CI->getArgOperand(1), B, TD); return CI; // Known to have no uses (see above). } }; @@ -1123,11 +1163,11 @@ struct FPrintFOpt : public LibCallOptimization { // All the optimizations depend on the format string. std::string FormatStr; - if (!GetConstantStringInfo(CI->getOperand(2), FormatStr)) + if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) return 0; // fprintf(F, "foo") --> fwrite("foo", 3, 1, F) - if (CI->getNumOperands() == 3) { + if (CI->getNumArgOperands() == 2) { for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) if (FormatStr[i] == '%') // Could handle %% -> % if we cared. return 0; // We found a format specifier. @@ -1135,31 +1175,32 @@ struct FPrintFOpt : public LibCallOptimization { // These optimizations require TargetData. if (!TD) return 0; - EmitFWrite(CI->getOperand(2), + EmitFWrite(CI->getArgOperand(1), ConstantInt::get(TD->getIntPtrType(*Context), FormatStr.size()), - CI->getOperand(1), B, TD); + CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), FormatStr.size()); } // The remaining optimizations require the format string to be "%s" or "%c" // and have an extra operand. - if (FormatStr.size() != 2 || FormatStr[0] != '%' || CI->getNumOperands() <4) + if (FormatStr.size() != 2 || FormatStr[0] != '%' || + CI->getNumArgOperands() < 3) return 0; // Decode the second character of the format string. if (FormatStr[1] == 'c') { - // fprintf(F, "%c", chr) --> *(i8*)dst = chr - if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0; - EmitFPutC(CI->getOperand(3), CI->getOperand(1), B, TD); + // fprintf(F, "%c", chr) --> fputc(chr, F) + if (!CI->getArgOperand(2)->getType()->isIntegerTy()) return 0; + EmitFPutC(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return ConstantInt::get(CI->getType(), 1); } if (FormatStr[1] == 's') { - // fprintf(F, "%s", str) -> fputs(str, F) - if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty()) + // fprintf(F, "%s", str) --> fputs(str, F) + if (!CI->getArgOperand(2)->getType()->isPointerTy() || !CI->use_empty()) return 0; - EmitFPutS(CI->getOperand(3), CI->getOperand(1), B, TD); + EmitFPutS(CI->getArgOperand(2), CI->getArgOperand(0), B, TD); return CI; } return 0; diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp index 2306a77..9208238 100644 --- a/lib/Transforms/Scalar/TailDuplication.cpp +++ b/lib/Transforms/Scalar/TailDuplication.cpp @@ -206,12 +206,13 @@ static BasicBlock *FindObviousSharedDomOf(BasicBlock *SrcBlock, // there is only one other pred, get it, otherwise we can't handle it. PI = pred_begin(DstBlock); PE = pred_end(DstBlock); BasicBlock *DstOtherPred = 0; - if (*PI == SrcBlock) { + BasicBlock *P = *PI; + if (P == SrcBlock) { if (++PI == PE) return 0; DstOtherPred = *PI; if (++PI != PE) return 0; } else { - DstOtherPred = *PI; + DstOtherPred = P; if (++PI == PE || *PI != SrcBlock || ++PI != PE) return 0; } diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5ad5de2..01c8e5d 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -16,9 +16,9 @@ // transformation from taking place, though currently the analysis cannot // support moving any really useful instructions (only dead ones). // 2. This pass transforms functions that are prevented from being tail -// recursive by an associative expression to use an accumulator variable, -// thus compiling the typical naive factorial or 'fib' implementation into -// efficient code. +// recursive by an associative and commutative expression to use an +// accumulator variable, thus compiling the typical naive factorial or +// 'fib' implementation into efficient code. // 3. TRE is performed if the function returns void, if the return // returns the result returned by the call, or if the function returns a // run-time constant on all exits from the function. It is possible, though @@ -60,6 +60,7 @@ #include "llvm/Pass.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/CFG.h" #include "llvm/ADT/Statistic.h" @@ -252,7 +253,7 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { // If we are passing this argument into call as the corresponding // argument operand, then the argument is dynamically constant. // Otherwise, we cannot transform this function safely. - if (CI->getOperand(ArgNo+1) == Arg) + if (CI->getArgOperand(ArgNo) == Arg) return true; } @@ -269,16 +270,16 @@ static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) { } // getCommonReturnValue - Check to see if the function containing the specified -// return instruction and tail call consistently returns the same -// runtime-constant value at all exit points. If so, return the returned value. +// tail call consistently returns the same runtime-constant value at all exit +// points except for IgnoreRI. If so, return the returned value. // -static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { - Function *F = TheRI->getParent()->getParent(); +static Value *getCommonReturnValue(ReturnInst *IgnoreRI, CallInst *CI) { + Function *F = CI->getParent()->getParent(); Value *ReturnedValue = 0; for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI) if (ReturnInst *RI = dyn_cast(BBI->getTerminator())) - if (RI != TheRI) { + if (RI != IgnoreRI) { Value *RetOp = RI->getOperand(0); // We can only perform this transformation if the value returned is @@ -301,9 +302,9 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) { /// Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI) { - if (!I->isAssociative()) return 0; + if (!I->isAssociative() || !I->isCommutative()) return 0; assert(I->getNumOperands() == 2 && - "Associative operations should have 2 args!"); + "Associative/commutative operations should have 2 args!"); // Exactly one operand should be the result of the call instruction... if ((I->getOperand(0) == CI && I->getOperand(1) == CI) || @@ -368,11 +369,16 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, return false; } - // If we are introducing accumulator recursion to eliminate associative - // operations after the call instruction, this variable contains the initial - // value for the accumulator. If this value is set, we actually perform - // accumulator recursion elimination instead of simple tail recursion - // elimination. + // If we are introducing accumulator recursion to eliminate operations after + // the call instruction that are both associative and commutative, the initial + // value for the accumulator is placed in this variable. If this value is set + // then we actually perform accumulator recursion elimination instead of + // simple tail recursion elimination. If the operation is an LLVM instruction + // (eg: "add") then it is recorded in AccumulatorRecursionInstr. If not, then + // we are handling the case when the return instruction returns a constant C + // which is different to the constant returned by other return instructions + // (which is recorded in AccumulatorRecursionEliminationInitVal). This is a + // special case of accumulator recursion, the operation being "return C". Value *AccumulatorRecursionEliminationInitVal = 0; Instruction *AccumulatorRecursionInstr = 0; @@ -383,9 +389,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, for (BBI = CI, ++BBI; &*BBI != Ret; ++BBI) if (!CanMoveAboveCall(BBI, CI)) { // If we can't move the instruction above the call, it might be because it - // is an associative operation that could be tranformed using accumulator - // recursion elimination. Check to see if this is the case, and if so, - // remember the initial accumulator value for later. + // is an associative and commutative operation that could be tranformed + // using accumulator recursion elimination. Check to see if this is the + // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = CanTransformAccumulatorRecursion(BBI, CI))) { // Yes, this is accumulator recursion. Remember which instruction @@ -403,8 +409,18 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (Ret->getNumOperands() == 1 && Ret->getReturnValue() != CI && !isa(Ret->getReturnValue()) && AccumulatorRecursionEliminationInitVal == 0 && - !getCommonReturnValue(Ret, CI)) - return false; + !getCommonReturnValue(0, CI)) { + // One case remains that we are able to handle: the current return + // instruction returns a constant, and all other return instructions + // return a different constant. + if (!isDynamicConstant(Ret->getReturnValue(), CI, Ret)) + return false; // Current return instruction does not return a constant. + // Check that all other return instructions return a common constant. If + // so, record it in AccumulatorRecursionEliminationInitVal. + AccumulatorRecursionEliminationInitVal = getCommonReturnValue(Ret, CI); + if (!AccumulatorRecursionEliminationInitVal) + return false; + } // OK! We can transform this tail call. If this is the first one found, // create the new entry block, allowing us to branch back to the old entry. @@ -453,8 +469,8 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // Ok, now that we know we have a pseudo-entry block WITH all of the // required PHI nodes, add entries into the PHI node for the actual // parameters passed into the tail-recursive call. - for (unsigned i = 0, e = CI->getNumOperands()-1; i != e; ++i) - ArgumentPHIs[i]->addIncoming(CI->getOperand(i+1), BB); + for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) + ArgumentPHIs[i]->addIncoming(CI->getArgOperand(i), BB); // If we are introducing an accumulator variable to eliminate the recursion, // do so now. Note that we _know_ that no subsequent tail recursion @@ -464,8 +480,9 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. - PHINode *AccPN = PHINode::Create(AccRecInstr->getType(), "accumulator.tr", - OldEntry->begin()); + PHINode *AccPN = + PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the // real entry into the function we seed the PHI with the initial value, @@ -475,20 +492,27 @@ bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry, // it will not show up as a predecessor. for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); PI != PE; ++PI) { - if (*PI == &F->getEntryBlock()) - AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, *PI); + BasicBlock *P = *PI; + if (P == &F->getEntryBlock()) + AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); else - AccPN->addIncoming(AccPN, *PI); + AccPN->addIncoming(AccPN, P); } - // Add an incoming argument for the current block, which is computed by our - // associative accumulator instruction. - AccPN->addIncoming(AccRecInstr, BB); - - // Next, rewrite the accumulator recursion instruction so that it does not - // use the result of the call anymore, instead, use the PHI node we just - // inserted. - AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + if (AccRecInstr) { + // Add an incoming argument for the current block, which is computed by + // our associative and commutative accumulator instruction. + AccPN->addIncoming(AccRecInstr, BB); + + // Next, rewrite the accumulator recursion instruction so that it does not + // use the result of the call anymore, instead, use the PHI node we just + // inserted. + AccRecInstr->setOperand(AccRecInstr->getOperand(0) != CI, AccPN); + } else { + // Add an incoming argument for the current block, which is just the + // constant returned by the current return instruction. + AccPN->addIncoming(Ret->getReturnValue(), BB); + } // Finally, rewrite any return instructions in the program to return the PHI // node instead of the "initval" that they do currently. This loop will diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp index ea9d1c1..4d64c85 100644 --- a/lib/Transforms/Utils/AddrModeMatcher.cpp +++ b/lib/Transforms/Utils/AddrModeMatcher.cpp @@ -381,29 +381,28 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI) { std::vector Constraints = IA->ParseConstraints(); - - unsigned ArgNo = 1; // ArgNo - The operand of the CallInst. + + unsigned ArgNo = 0; // The argument of the CallInst. for (unsigned i = 0, e = Constraints.size(); i != e; ++i) { TargetLowering::AsmOperandInfo OpInfo(Constraints[i]); - + // Compute the value type for each operand. switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.isIndirect) - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isInput: - OpInfo.CallOperandVal = CI->getOperand(ArgNo++); + OpInfo.CallOperandVal = CI->getArgOperand(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. break; } - + // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue(), - OpInfo.ConstraintType == TargetLowering::C_Memory); - + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + // If this asm operand is our Value*, and if it isn't an indirect memory // operand, we can't fold it! if (OpInfo.CallOperandVal == OpVal && @@ -411,7 +410,7 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, !OpInfo.isIndirect)) return false; } - + return true; } @@ -450,7 +449,7 @@ static bool FindAllMemoryUses(Instruction *I, if (CallInst *CI = dyn_cast(U)) { InlineAsm *IA = dyn_cast(CI->getCalledValue()); - if (IA == 0) return true; + if (!IA) return true; // If this is a memory operand, we're cool, otherwise bail out. if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 2f1ae00..ec625b4 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -558,121 +558,3 @@ void llvm::FindFunctionBackedges(const Function &F, } - - - -/// AreEquivalentAddressValues - Test if A and B will obviously have the same -/// value. This includes recognizing that %t0 and %t1 will have the same -/// value in code like this: -/// %t0 = getelementptr \@a, 0, 3 -/// store i32 0, i32* %t0 -/// %t1 = getelementptr \@a, 0, 3 -/// %t2 = load i32* %t1 -/// -static bool AreEquivalentAddressValues(const Value *A, const Value *B) { - // Test if the values are trivially equivalent. - if (A == B) return true; - - // Test if the values come from identical arithmetic instructions. - // Use isIdenticalToWhenDefined instead of isIdenticalTo because - // this function is only used when one address use dominates the - // other, which means that they'll always either have the same - // value or one of them will have an undefined value. - if (isa(A) || isa(A) || - isa(A) || isa(A)) - if (const Instruction *BI = dyn_cast(B)) - if (cast(A)->isIdenticalToWhenDefined(BI)) - return true; - - // Otherwise they may not be equivalent. - return false; -} - -/// FindAvailableLoadedValue - Scan the ScanBB block backwards (starting at the -/// instruction before ScanFrom) checking to see if we have the value at the -/// memory address *Ptr locally available within a small number of instructions. -/// If the value is available, return it. -/// -/// If not, return the iterator for the last validated instruction that the -/// value would be live through. If we scanned the entire block and didn't find -/// something that invalidates *Ptr or provides it, ScanFrom would be left at -/// begin() and this returns null. ScanFrom could also be left -/// -/// MaxInstsToScan specifies the maximum instructions to scan in the block. If -/// it is set to 0, it will scan the whole block. You can also optionally -/// specify an alias analysis implementation, which makes this more precise. -Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB, - BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, - AliasAnalysis *AA) { - if (MaxInstsToScan == 0) MaxInstsToScan = ~0U; - - // If we're using alias analysis to disambiguate get the size of *Ptr. - unsigned AccessSize = 0; - if (AA) { - const Type *AccessTy = cast(Ptr->getType())->getElementType(); - AccessSize = AA->getTypeStoreSize(AccessTy); - } - - while (ScanFrom != ScanBB->begin()) { - // We must ignore debug info directives when counting (otherwise they - // would affect codegen). - Instruction *Inst = --ScanFrom; - if (isa(Inst)) - continue; - - // Restore ScanFrom to expected value in case next test succeeds - ScanFrom++; - - // Don't scan huge blocks. - if (MaxInstsToScan-- == 0) return 0; - - --ScanFrom; - // If this is a load of Ptr, the loaded value is available. - if (LoadInst *LI = dyn_cast(Inst)) - if (AreEquivalentAddressValues(LI->getOperand(0), Ptr)) - return LI; - - if (StoreInst *SI = dyn_cast(Inst)) { - // If this is a store through Ptr, the value is available! - if (AreEquivalentAddressValues(SI->getOperand(1), Ptr)) - return SI->getOperand(0); - - // If Ptr is an alloca and this is a store to a different alloca, ignore - // the store. This is a trivial form of alias analysis that is important - // for reg2mem'd code. - if ((isa(Ptr) || isa(Ptr)) && - (isa(SI->getOperand(1)) || - isa(SI->getOperand(1)))) - continue; - - // If we have alias analysis and it says the store won't modify the loaded - // value, ignore the store. - if (AA && - (AA->getModRefInfo(SI, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // Otherwise the store that may or may not alias the pointer, bail out. - ++ScanFrom; - return 0; - } - - // If this is some other instruction that may clobber Ptr, bail out. - if (Inst->mayWriteToMemory()) { - // If alias analysis claims that it really won't modify the load, - // ignore it. - if (AA && - (AA->getModRefInfo(Inst, Ptr, AccessSize) & AliasAnalysis::Mod) == 0) - continue; - - // May modify the pointer, bail out. - ++ScanFrom; - return 0; - } - } - - // Got to the start of the block, we didn't find it, but are done for this - // block. - return 0; -} - diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 8c25ad1..26f53c0 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -106,11 +106,12 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, // If AllowIdenticalEdges is true, then we allow this edge to be considered // non-critical iff all preds come from TI's block. while (I != E) { - if (*I != FirstPred) + const BasicBlock *P = *I; + if (P != FirstPred) return true; // Note: leave this as is until no one ever compiles with either gcc 4.0.1 // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 - E = pred_end(*I); + E = pred_end(P); ++I; } return false; @@ -277,11 +278,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, OtherPreds.push_back(PN->getIncomingBlock(i)); } else { for (pred_iterator I = pred_begin(DestBB), E = pred_end(DestBB); - I != E; ++I) - if (*I != NewBB) - OtherPreds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (P != NewBB) + OtherPreds.push_back(P); + } } - + bool NewBBDominatesDestBB = true; // Should we update DominatorTree information? @@ -400,11 +403,13 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum, bool HasPredOutsideOfLoop = false; BasicBlock *Exit = ExitBlocks[i]; for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); - I != E; ++I) - if (TIL->contains(*I)) - Preds.push_back(*I); + I != E; ++I) { + BasicBlock *P = *I; + if (TIL->contains(P)) + Preds.push_back(P); else HasPredOutsideOfLoop = true; + } // If there are any preds not in the loop, we'll need to split // the edges. The Preds.empty() check is needed because a block // may appear multiple times in the list. We can't use diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index 767fa3a..7a9d007 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -69,6 +69,31 @@ Value *llvm::EmitStrChr(Value *Ptr, char C, IRBuilder<> &B, return CI; } +/// EmitStrNCmp - Emit a call to the strncmp function to the builder. +Value *llvm::EmitStrNCmp(Value *Ptr1, Value *Ptr2, Value *Len, + IRBuilder<> &B, const TargetData *TD) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + AttributeWithIndex AWI[3]; + AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture); + AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture); + AWI[2] = AttributeWithIndex::get(~0u, Attribute::ReadOnly | + Attribute::NoUnwind); + + LLVMContext &Context = B.GetInsertBlock()->getContext(); + Value *StrNCmp = M->getOrInsertFunction("strncmp", AttrListPtr::get(AWI, 3), + B.getInt32Ty(), + B.getInt8PtrTy(), + B.getInt8PtrTy(), + TD->getIntPtrType(Context), NULL); + CallInst *CI = B.CreateCall3(StrNCmp, CastToCStr(Ptr1, B), + CastToCStr(Ptr2, B), Len, "strncmp"); + + if (const Function *F = dyn_cast(StrNCmp->stripPointerCasts())) + CI->setCallingConv(F->getCallingConv()); + + return CI; +} + /// EmitStrCpy - Emit a call to the strcpy function to the builder, for the /// specified pointer arguments. Value *llvm::EmitStrCpy(Value *Dst, Value *Src, IRBuilder<> &B, @@ -112,10 +137,10 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len, Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, bool isVolatile, IRBuilder<> &B, const TargetData *TD) { Module *M = B.GetInsertBlock()->getParent()->getParent(); - const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; - Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); Dst = CastToCStr(Dst, B); Src = CastToCStr(Src, B); + const Type *ArgTys[3] = { Dst->getType(), Src->getType(), Len->getType() }; + Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, ArgTys, 3); return B.CreateCall5(MemCpy, Dst, Src, Len, ConstantInt::get(B.getInt32Ty(), Align), ConstantInt::get(B.getInt1Ty(), isVolatile)); @@ -395,11 +420,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -418,11 +443,11 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + EmitMemMove(CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2), 1, false, B, TD); - replaceCall(CI->getOperand(1)); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -436,12 +461,12 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { FT->getParamType(2) != TD->getIntPtrType(Context) || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Val = B.CreateIntCast(CI->getOperand(2), B.getInt8Ty(), + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), false, B, TD); - replaceCall(CI->getOperand(1)); + EmitMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), false, B, TD); + replaceCall(CI->getArgOperand(0)); return true; } return false; @@ -462,8 +487,8 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { // st[rp]cpy_chk call which may fail at runtime if the size is too long. // TODO: It might be nice to get a maximum length out of the possible // string lengths for varying. - if (isFoldable(3, 2, true)) { - Value *Ret = EmitStrCpy(CI->getOperand(1), CI->getOperand(2), B, TD, + if (isFoldable(2 + CallInst::ArgOffset, 1 + CallInst::ArgOffset, true)) { + Value *Ret = EmitStrCpy(CI->getArgOperand(0), CI->getArgOperand(1), B, TD, Name.substr(2, 6)); replaceCall(Ret); return true; @@ -479,10 +504,10 @@ bool SimplifyFortifiedLibCalls::fold(CallInst *CI, const TargetData *TD) { !FT->getParamType(2)->isIntegerTy() || FT->getParamType(3) != TD->getIntPtrType(Context)) return false; - - if (isFoldable(4, 3, false)) { - Value *Ret = EmitStrNCpy(CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), B, TD, Name.substr(2, 7)); + + if (isFoldable(3 + CallInst::ArgOffset, 2 + CallInst::ArgOffset, false)) { + Value *Ret = EmitStrNCpy(CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), B, TD, Name.substr(2, 7)); replaceCall(Ret); return true; } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 6d4fe4b..1dcfd57 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -32,7 +32,7 @@ using namespace llvm; // CloneBasicBlock - See comments in Cloning.h BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, - DenseMap &ValueMap, + ValueToValueMapTy &VMap, const Twine &NameSuffix, Function *F, ClonedCodeInfo *CodeInfo) { BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); @@ -47,7 +47,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa(II) && !isa(II)); if (const AllocaInst *AI = dyn_cast(II)) { @@ -72,7 +72,7 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, // ArgMap values. // void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo) { assert(NameSuffix && "NameSuffix cannot be null!"); @@ -80,17 +80,17 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - assert(ValueMap.count(I) && "No mapping from source argument specified!"); + assert(VMap.count(I) && "No mapping from source argument specified!"); #endif // Clone any attributes. if (NewFunc->arg_size() == OldFunc->arg_size()) NewFunc->copyAttributesFrom(OldFunc); else { - //Some arguments were deleted with the ValueMap. Copy arguments one by one + //Some arguments were deleted with the VMap. Copy arguments one by one for (Function::const_arg_iterator I = OldFunc->arg_begin(), E = OldFunc->arg_end(); I != E; ++I) - if (Argument* Anew = dyn_cast(ValueMap[I])) + if (Argument* Anew = dyn_cast(VMap[I])) Anew->addAttr( OldFunc->getAttributes() .getParamAttributes(I->getArgNo() + 1)); NewFunc->setAttributes(NewFunc->getAttributes() @@ -111,43 +111,43 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, const BasicBlock &BB = *BI; // Create a new basic block and copy instructions into it! - BasicBlock *CBB = CloneBasicBlock(&BB, ValueMap, NameSuffix, NewFunc, + BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - ValueMap[&BB] = CBB; // Add basic block mapping. + VMap[&BB] = CBB; // Add basic block mapping. if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) Returns.push_back(RI); } // Loop over all of the instructions in the function, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // - for (Function::iterator BB = cast(ValueMap[OldFunc->begin()]), + for (Function::iterator BB = cast(VMap[OldFunc->begin()]), BE = NewFunc->end(); BB != BE; ++BB) // Loop over all instructions, fixing each one as we find it... for (BasicBlock::iterator II = BB->begin(); II != BB->end(); ++II) - RemapInstruction(II, ValueMap); + RemapInstruction(II, VMap); } /// CloneFunction - Return a copy of the specified function, but without /// embedding the function into another module. Also, any references specified -/// in the ValueMap are changed to refer to their mapped value instead of the -/// original one. If any of the arguments to the function are in the ValueMap, -/// the arguments are deleted from the resultant function. The ValueMap is +/// in the VMap are changed to refer to their mapped value instead of the +/// original one. If any of the arguments to the function are in the VMap, +/// the arguments are deleted from the resultant function. The VMap is /// updated to include mappings from all of the instructions and basicblocks in /// the function from their old to new values. /// Function *llvm::CloneFunction(const Function *F, - DenseMap &ValueMap, + ValueToValueMapTy &VMap, ClonedCodeInfo *CodeInfo) { std::vector ArgTypes; // The user might be deleting arguments to the function by specifying them in - // the ValueMap. If so, we need to not add the arguments to the arg ty vector + // the VMap. If so, we need to not add the arguments to the arg ty vector // for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) // Haven't mapped the argument to anything yet? + if (VMap.count(I) == 0) // Haven't mapped the argument to anything yet? ArgTypes.push_back(I->getType()); // Create a new function type... @@ -161,13 +161,13 @@ Function *llvm::CloneFunction(const Function *F, Function::arg_iterator DestI = NewF->arg_begin(); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) - if (ValueMap.count(I) == 0) { // Is this argument preserved? + if (VMap.count(I) == 0) { // Is this argument preserved? DestI->setName(I->getName()); // Copy the name over... - ValueMap[I] = DestI++; // Add mapping to ValueMap + VMap[I] = DestI++; // Add mapping to VMap } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(NewF, F, ValueMap, Returns, "", CodeInfo); + CloneFunctionInto(NewF, F, VMap, Returns, "", CodeInfo); return NewF; } @@ -179,19 +179,19 @@ namespace { struct PruningFunctionCloner { Function *NewFunc; const Function *OldFunc; - DenseMap &ValueMap; + ValueToValueMapTy &VMap; SmallVectorImpl &Returns; const char *NameSuffix; ClonedCodeInfo *CodeInfo; const TargetData *TD; public: PruningFunctionCloner(Function *newFunc, const Function *oldFunc, - DenseMap &valueMap, + ValueToValueMapTy &valueMap, SmallVectorImpl &returns, const char *nameSuffix, ClonedCodeInfo *codeInfo, const TargetData *td) - : NewFunc(newFunc), OldFunc(oldFunc), ValueMap(valueMap), Returns(returns), + : NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap), Returns(returns), NameSuffix(nameSuffix), CodeInfo(codeInfo), TD(td) { } @@ -202,7 +202,7 @@ namespace { public: /// ConstantFoldMappedInstruction - Constant fold the specified instruction, - /// mapping its operands through ValueMap if they are available. + /// mapping its operands through VMap if they are available. Constant *ConstantFoldMappedInstruction(const Instruction *I); }; } @@ -211,7 +211,7 @@ namespace { /// anything that it can reach. void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, std::vector &ToClone){ - Value *&BBEntry = ValueMap[BB]; + Value *&BBEntry = VMap[BB]; // Have we already cloned this block? if (BBEntry) return; @@ -230,7 +230,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If this instruction constant folds, don't bother cloning the instruction, // instead, just add the constant to the value map. if (Constant *C = ConstantFoldMappedInstruction(II)) { - ValueMap[II] = C; + VMap[II] = C; continue; } @@ -238,7 +238,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (II->hasName()) NewInst->setName(II->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[II] = NewInst; // Add instruction map to value. + VMap[II] = NewInst; // Add instruction map to value. hasCalls |= (isa(II) && !isa(II)); if (const AllocaInst *AI = dyn_cast(II)) { @@ -258,12 +258,12 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, ConstantInt *Cond = dyn_cast(BI->getCondition()); // Or is a known constant in the caller... if (Cond == 0) - Cond = dyn_cast_or_null(ValueMap[BI->getCondition()]); + Cond = dyn_cast_or_null(VMap[BI->getCondition()]); // Constant fold to uncond branch! if (Cond) { BasicBlock *Dest = BI->getSuccessor(!Cond->getZExtValue()); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -272,10 +272,10 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, // If switching on a value known constant in the caller. ConstantInt *Cond = dyn_cast(SI->getCondition()); if (Cond == 0) // Or known constant after constant prop in the callee... - Cond = dyn_cast_or_null(ValueMap[SI->getCondition()]); + Cond = dyn_cast_or_null(VMap[SI->getCondition()]); if (Cond) { // Constant fold to uncond branch! BasicBlock *Dest = SI->getSuccessor(SI->findCaseValue(Cond)); - ValueMap[OldTI] = BranchInst::Create(Dest, NewBB); + VMap[OldTI] = BranchInst::Create(Dest, NewBB); ToClone.push_back(Dest); TerminatorDone = true; } @@ -286,7 +286,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, if (OldTI->hasName()) NewInst->setName(OldTI->getName()+NameSuffix); NewBB->getInstList().push_back(NewInst); - ValueMap[OldTI] = NewInst; // Add instruction map to value. + VMap[OldTI] = NewInst; // Add instruction map to value. // Recursively clone any reachable successor blocks. const TerminatorInst *TI = BB->getTerminator(); @@ -307,13 +307,13 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, } /// ConstantFoldMappedInstruction - Constant fold the specified instruction, -/// mapping its operands through ValueMap if they are available. +/// mapping its operands through VMap if they are available. Constant *PruningFunctionCloner:: ConstantFoldMappedInstruction(const Instruction *I) { SmallVector Ops; for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) if (Constant *Op = dyn_cast_or_null(MapValue(I->getOperand(i), - ValueMap))) + VMap))) Ops.push_back(Op); else return 0; // All operands not constant! @@ -363,7 +363,7 @@ static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD) { /// dead. Since this doesn't produce an exact copy of the input, it can't be /// used for things like CloneFunction or CloneModule. void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, - DenseMap &ValueMap, + ValueToValueMapTy &VMap, SmallVectorImpl &Returns, const char *NameSuffix, ClonedCodeInfo *CodeInfo, @@ -374,10 +374,10 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, #ifndef NDEBUG for (Function::const_arg_iterator II = OldFunc->arg_begin(), E = OldFunc->arg_end(); II != E; ++II) - assert(ValueMap.count(II) && "No mapping from source argument specified!"); + assert(VMap.count(II) && "No mapping from source argument specified!"); #endif - PruningFunctionCloner PFC(NewFunc, OldFunc, ValueMap, Returns, + PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, Returns, NameSuffix, CodeInfo, TD); // Clone the entry block, and anything recursively reachable from it. @@ -397,14 +397,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, SmallVector PHIToResolve; for (Function::const_iterator BI = OldFunc->begin(), BE = OldFunc->end(); BI != BE; ++BI) { - BasicBlock *NewBB = cast_or_null(ValueMap[BI]); + BasicBlock *NewBB = cast_or_null(VMap[BI]); if (NewBB == 0) continue; // Dead block. // Add the new block to the new function. NewFunc->getBasicBlockList().push_back(NewBB); // Loop over all of the instructions in the block, fixing up operand - // references as we go. This uses ValueMap to do all the hard work. + // references as we go. This uses VMap to do all the hard work. // BasicBlock::iterator I = NewBB->begin(); @@ -455,7 +455,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, I->setMetadata(DbgKind, 0); } } - RemapInstruction(I, ValueMap); + RemapInstruction(I, VMap); } } @@ -465,19 +465,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, const PHINode *OPN = PHIToResolve[phino]; unsigned NumPreds = OPN->getNumIncomingValues(); const BasicBlock *OldBB = OPN->getParent(); - BasicBlock *NewBB = cast(ValueMap[OldBB]); + BasicBlock *NewBB = cast(VMap[OldBB]); // Map operands for blocks that are live and remove operands for blocks // that are dead. for (; phino != PHIToResolve.size() && PHIToResolve[phino]->getParent() == OldBB; ++phino) { OPN = PHIToResolve[phino]; - PHINode *PN = cast(ValueMap[OPN]); + PHINode *PN = cast(VMap[OPN]); for (unsigned pred = 0, e = NumPreds; pred != e; ++pred) { if (BasicBlock *MappedBlock = - cast_or_null(ValueMap[PN->getIncomingBlock(pred)])) { + cast_or_null(VMap[PN->getIncomingBlock(pred)])) { Value *InVal = MapValue(PN->getIncomingValue(pred), - ValueMap); + VMap); assert(InVal && "Unknown input value?"); PN->setIncomingValue(pred, InVal); PN->setIncomingBlock(pred, MappedBlock); @@ -531,15 +531,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, while ((PN = dyn_cast(I++))) { Value *NV = UndefValue::get(PN->getType()); PN->replaceAllUsesWith(NV); - assert(ValueMap[OldI] == PN && "ValueMap mismatch"); - ValueMap[OldI] = NV; + assert(VMap[OldI] == PN && "VMap mismatch"); + VMap[OldI] = NV; PN->eraseFromParent(); ++OldI; } } // NOTE: We cannot eliminate single entry phi nodes here, because of - // ValueMap. Single entry phi nodes can have multiple ValueMap entries - // pointing at them. Thus, deleting one would require scanning the ValueMap + // VMap. Single entry phi nodes can have multiple VMap entries + // pointing at them. Thus, deleting one would require scanning the VMap // to update any entries in it that would require that. This would be // really slow. } @@ -548,14 +548,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, // and zap unconditional fall-through branches. This happen all the time when // specializing code: code specialization turns conditional branches into // uncond branches, and this code folds them. - Function::iterator I = cast(ValueMap[&OldFunc->getEntryBlock()]); + Function::iterator I = cast(VMap[&OldFunc->getEntryBlock()]); while (I != NewFunc->end()) { BranchInst *BI = dyn_cast(I->getTerminator()); if (!BI || BI->isConditional()) { ++I; continue; } // Note that we can't eliminate uncond branches if the destination has // single-entry PHI nodes. Eliminating the single-entry phi nodes would - // require scanning the ValueMap to update any entries that point to the phi + // require scanning the VMap to update any entries that point to the phi // node. BasicBlock *Dest = BI->getSuccessor(0); if (!Dest->getSinglePredecessor() || isa(Dest->begin())) { diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp index 38928dc..551b630 100644 --- a/lib/Transforms/Utils/CloneLoop.cpp +++ b/lib/Transforms/Utils/CloneLoop.cpp @@ -15,7 +15,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -23,13 +22,13 @@ using namespace llvm; /// CloneDominatorInfo - Clone basicblock's dominator tree and, if available, /// dominance info. It is expected that basic block is already cloned. static void CloneDominatorInfo(BasicBlock *BB, - DenseMap &ValueMap, + ValueMap &VMap, DominatorTree *DT, DominanceFrontier *DF) { assert (DT && "DominatorTree is not available"); - DenseMap::iterator BI = ValueMap.find(BB); - assert (BI != ValueMap.end() && "BasicBlock clone is missing"); + ValueMap::iterator BI = VMap.find(BB); + assert (BI != VMap.end() && "BasicBlock clone is missing"); BasicBlock *NewBB = cast(BI->second); // NewBB already got dominator info. @@ -43,11 +42,11 @@ static void CloneDominatorInfo(BasicBlock *BB, // NewBB's dominator is either BB's dominator or BB's dominator's clone. BasicBlock *NewBBDom = BBDom; - DenseMap::iterator BBDomI = ValueMap.find(BBDom); - if (BBDomI != ValueMap.end()) { + ValueMap::iterator BBDomI = VMap.find(BBDom); + if (BBDomI != VMap.end()) { NewBBDom = cast(BBDomI->second); if (!DT->getNode(NewBBDom)) - CloneDominatorInfo(BBDom, ValueMap, DT, DF); + CloneDominatorInfo(BBDom, VMap, DT, DF); } DT->addNewBlock(NewBB, NewBBDom); @@ -60,8 +59,8 @@ static void CloneDominatorInfo(BasicBlock *BB, for (DominanceFrontier::DomSetType::iterator I = S.begin(), E = S.end(); I != E; ++I) { BasicBlock *DB = *I; - DenseMap::iterator IDM = ValueMap.find(DB); - if (IDM != ValueMap.end()) + ValueMap::iterator IDM = VMap.find(DB); + if (IDM != VMap.end()) NewDFSet.insert(cast(IDM->second)); else NewDFSet.insert(DB); @@ -71,10 +70,10 @@ static void CloneDominatorInfo(BasicBlock *BB, } } -/// CloneLoop - Clone Loop. Clone dominator info. Populate ValueMap +/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap /// using old blocks to new blocks mapping. Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, - DenseMap &ValueMap, Pass *P) { + ValueMap &VMap, Pass *P) { DominatorTree *DT = NULL; DominanceFrontier *DF = NULL; @@ -104,8 +103,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - BasicBlock *NewBB = CloneBasicBlock(BB, ValueMap, ".clone"); - ValueMap[BB] = NewBB; + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone"); + VMap[BB] = NewBB; if (P) LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L); NewLoop->addBasicBlockToLoop(NewBB, LI->getBase()); @@ -117,7 +116,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) { BasicBlock *BB = *I; - CloneDominatorInfo(BB, ValueMap, DT, DF); + CloneDominatorInfo(BB, VMap, DT, DF); } // Process sub loops @@ -125,7 +124,7 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, LoopNest.push_back(*I); } while (!LoopNest.empty()); - // Remap instructions to reference operands from ValueMap. + // Remap instructions to reference operands from VMap. for(SmallVector::iterator NBItr = NewBlocks.begin(), NBE = NewBlocks.end(); NBItr != NBE; ++NBItr) { BasicBlock *NB = *NBItr; @@ -135,8 +134,8 @@ Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager *LPM, LoopInfo *LI, for (unsigned index = 0, num_ops = Insn->getNumOperands(); index != num_ops; ++index) { Value *Op = Insn->getOperand(index); - DenseMap::iterator OpItr = ValueMap.find(Op); - if (OpItr != ValueMap.end()) + ValueMap::iterator OpItr = VMap.find(Op); + if (OpItr != VMap.end()) Insn->setOperand(index, OpItr->second); } } diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index b87c082..fc603d2 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -28,12 +28,12 @@ using namespace llvm; Module *llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. - DenseMap ValueMap; - return CloneModule(M, ValueMap); + ValueToValueMapTy VMap; + return CloneModule(M, VMap); } Module *llvm::CloneModule(const Module *M, - DenseMap &ValueMap) { + ValueToValueMapTy &VMap) { // First off, we need to create the new module... Module *New = new Module(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); @@ -51,7 +51,7 @@ Module *llvm::CloneModule(const Module *M, New->addLibrary(*I); // Loop over all of the global variables, making corresponding globals in the - // new module. Here we add them to the ValueMap and to the new Module. We + // new module. Here we add them to the VMap and to the new Module. We // don't worry about attributes or initializers, they will come later. // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); @@ -62,7 +62,7 @@ Module *llvm::CloneModule(const Module *M, GlobalValue::ExternalLinkage, 0, I->getName()); GV->setAlignment(I->getAlignment()); - ValueMap[I] = GV; + VMap[I] = GV; } // Loop over the functions in the module, making external functions as before @@ -71,13 +71,13 @@ Module *llvm::CloneModule(const Module *M, Function::Create(cast(I->getType()->getElementType()), GlobalValue::ExternalLinkage, I->getName(), New); NF->copyAttributesFrom(I); - ValueMap[I] = NF; + VMap[I] = NF; } // Loop over the aliases in the module for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) - ValueMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, + VMap[I] = new GlobalAlias(I->getType(), GlobalAlias::ExternalLinkage, I->getName(), NULL, New); // Now that all of the things that global variable initializer can refer to @@ -86,10 +86,10 @@ Module *llvm::CloneModule(const Module *M, // for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) { - GlobalVariable *GV = cast(ValueMap[I]); + GlobalVariable *GV = cast(VMap[I]); if (I->hasInitializer()) GV->setInitializer(cast(MapValue(I->getInitializer(), - ValueMap))); + VMap))); GV->setLinkage(I->getLinkage()); GV->setThreadLocal(I->isThreadLocal()); GV->setConstant(I->isConstant()); @@ -98,17 +98,17 @@ Module *llvm::CloneModule(const Module *M, // Similarly, copy over function bodies now... // for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { - Function *F = cast(ValueMap[I]); + Function *F = cast(VMap[I]); if (!I->isDeclaration()) { Function::arg_iterator DestI = F->arg_begin(); for (Function::const_arg_iterator J = I->arg_begin(); J != I->arg_end(); ++J) { DestI->setName(J->getName()); - ValueMap[J] = DestI++; + VMap[J] = DestI++; } SmallVector Returns; // Ignore returns cloned. - CloneFunctionInto(F, I, ValueMap, Returns); + CloneFunctionInto(F, I, VMap, Returns); } F->setLinkage(I->getLinkage()); @@ -117,11 +117,37 @@ Module *llvm::CloneModule(const Module *M, // And aliases for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { - GlobalAlias *GA = cast(ValueMap[I]); + GlobalAlias *GA = cast(VMap[I]); GA->setLinkage(I->getLinkage()); if (const Constant* C = I->getAliasee()) - GA->setAliasee(cast(MapValue(C, ValueMap))); + GA->setAliasee(cast(MapValue(C, VMap))); } - + + // And named metadata.... + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) { + const NamedMDNode &NMD = *I; + SmallVector MDs; + for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) + MDs.push_back(cast(MapValue(NMD.getOperand(i), VMap))); + NamedMDNode::Create(New->getContext(), NMD.getName(), + MDs.data(), MDs.size(), New); + } + + // Update metadata attach with instructions. + for (Module::iterator MI = New->begin(), ME = New->end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); + FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + SmallVector, 4 > MDs; + BI->getAllMetadata(MDs); + for (SmallVector, 4>::iterator + MDI = MDs.begin(), MDE = MDs.end(); MDI != MDE; ++MDI) { + Value *MappedValue = MapValue(MDI->second, VMap); + if (MDI->second != MappedValue && MappedValue) + BI->setMetadata(MDI->first, cast(MappedValue)); + } + } return New; } diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index c908b4a..8e82a02 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -35,7 +35,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, I.eraseFromParent(); return 0; } - + // Create a stack slot to hold the value. AllocaInst *Slot; if (AllocaPoint) { @@ -46,7 +46,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Slot = new AllocaInst(I.getType(), 0, I.getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Change all of the users of the instruction to read from the stack slot // instead. while (!I.use_empty()) { @@ -67,7 +67,7 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, Value *&V = Loads[PN->getIncomingBlock(i)]; if (V == 0) { // Insert the load into the predecessor block - V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, + V = new LoadInst(Slot, I.getName()+".reload", VolatileLoads, PN->getIncomingBlock(i)->getTerminator()); } PN->setIncomingValue(i, V); @@ -110,8 +110,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, /// The phi node is deleted and it returns the pointer to the alloca inserted. AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { if (P->use_empty()) { - P->eraseFromParent(); - return 0; + P->eraseFromParent(); + return 0; } // Create a stack slot to hold the value. @@ -124,23 +124,23 @@ AllocaInst* llvm::DemotePHIToStack(PHINode *P, Instruction *AllocaPoint) { Slot = new AllocaInst(P->getType(), 0, P->getName()+".reg2mem", F->getEntryBlock().begin()); } - + // Iterate over each operand, insert store in each predecessor. for (unsigned i = 0, e = P->getNumIncomingValues(); i < e; ++i) { if (InvokeInst *II = dyn_cast(P->getIncomingValue(i))) { - assert(II->getParent() != P->getIncomingBlock(i) && + assert(II->getParent() != P->getIncomingBlock(i) && "Invoke edge not supported yet"); II=II; } - new StoreInst(P->getIncomingValue(i), Slot, + new StoreInst(P->getIncomingValue(i), Slot, P->getIncomingBlock(i)->getTerminator()); } - + // Insert load in place of the phi and replace all uses. Value *V = new LoadInst(Slot, P->getName()+".reload", P); P->replaceAllUsesWith(V); - + // Delete phi. P->eraseFromParent(); - + return Slot; } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 91390bc..598e7d2 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -63,7 +63,8 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, // Next, create the new invoke instruction, inserting it at the end // of the old basic block. - SmallVector InvokeArgs(CI->op_begin()+1, CI->op_end()); + ImmutableCallSite CS(CI); + SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, InvokeDest, InvokeArgs.begin(), InvokeArgs.end(), @@ -169,7 +170,7 @@ static void HandleInlinedInvoke(InvokeInst *II, BasicBlock *FirstNewBlock, /// some edges of the callgraph may remain. static void UpdateCallGraphAfterInlining(CallSite CS, Function::iterator FirstNewBlock, - DenseMap &ValueMap, + ValueMap &VMap, InlineFunctionInfo &IFI) { CallGraph &CG = *IFI.CG; const Function *Caller = CS.getInstruction()->getParent()->getParent(); @@ -192,9 +193,9 @@ static void UpdateCallGraphAfterInlining(CallSite CS, for (; I != E; ++I) { const Value *OrigCall = I->first; - DenseMap::iterator VMI = ValueMap.find(OrigCall); + ValueMap::iterator VMI = VMap.find(OrigCall); // Only copy the edge if the call was inlined! - if (VMI == ValueMap.end() || VMI->second == 0) + if (VMI == VMap.end() || VMI->second == 0) continue; // If the call was inlined, but then constant folded, there is no edge to @@ -285,8 +286,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { ClonedCodeInfo InlinedFunctionInfo; Function::iterator FirstNewBlock; - { // Scope to destroy ValueMap after cloning. - DenseMap ValueMap; + { // Scope to destroy VMap after cloning. + ValueMap VMap; assert(CalledFunc->arg_size() == CS.arg_size() && "No varargs calls can be inlined!"); @@ -351,16 +352,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Uses of the argument in the function should use our new alloca // instead. ActualArg = NewAlloca; + + // Calls that we inline may use the new alloca, so we need to clear + // their 'tail' flags. + MustClearTailCallFlags = true; } - ValueMap[I] = ActualArg; + VMap[I] = ActualArg; } // We want the inliner to prune the code as it copies. We would LOVE to // have no dead or constant instructions leftover after inlining occurs // (which can happen, e.g., because an argument was constant), but we'll be // happy with whatever the cloner can do. - CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i", + CloneAndPruneFunctionInto(Caller, CalledFunc, VMap, Returns, ".i", &InlinedFunctionInfo, IFI.TD, TheCall); // Remember the first block that is newly cloned over. @@ -368,7 +373,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // Update the callgraph if requested. if (IFI.CG) - UpdateCallGraphAfterInlining(CS, FirstNewBlock, ValueMap, IFI); + UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI); } // If there are any alloca instructions in the block that used to be the entry diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index df6e603..e90c30b 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -190,14 +190,15 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { - BasicBlock *UserBB = cast(*UI)->getParent(); - if (PHINode *PN = dyn_cast(*UI)) + User *U = *UI; + BasicBlock *UserBB = cast(U)->getParent(); + if (PHINode *PN = dyn_cast(U)) UserBB = PN->getIncomingBlock(UI); if (InstBB != UserBB && !inLoop(UserBB)) UsesToRewrite.push_back(&UI.getUse()); } - + // If there are no uses outside the loop, exit with no change. if (UsesToRewrite.empty()) return false; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d03f7a6..0b48a8f 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -35,111 +35,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// Local analysis. -// - -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to Value::getUnderlyingObject but is located -/// here to avoid making VMCore depend on TargetData. -static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - &Indices[0], Indices.size()); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - -/// isSafeToLoadUnconditionally - Return true if we know that executing a load -/// from this value cannot trap. If it is not obviously safe to load from the -/// specified pointer, we do a quick local scan of the basic block containing -/// ScanFrom, to determine if the address is already accessed. -bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, - unsigned Align, const TargetData *TD) { - uint64_t ByteOffset = 0; - Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); - - const Type *BaseType = 0; - unsigned BaseAlign = 0; - if (const AllocaInst *AI = dyn_cast(Base)) { - // An alloca is safe to load from as load as it is suitably aligned. - BaseType = AI->getAllocatedType(); - BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast(Base)) { - // Global variables are safe to load from but their size cannot be - // guaranteed if they are overridden. - if (!isa(GV) && !GV->mayBeOverridden()) { - BaseType = GV->getType()->getElementType(); - BaseAlign = GV->getAlignment(); - } - } - - if (BaseType && BaseType->isSized()) { - if (TD && BaseAlign == 0) - BaseAlign = TD->getPrefTypeAlignment(BaseType); - - if (Align <= BaseAlign) { - if (!TD) - return true; // Loading directly from an alloca or global is OK. - - // Check if the load is within the bounds of the underlying object. - const PointerType *AddrTy = cast(V->getType()); - uint64_t LoadSize = TD->getTypeStoreSize(AddrTy->getElementType()); - if (ByteOffset + LoadSize <= TD->getTypeAllocSize(BaseType) && - (Align == 0 || (ByteOffset % Align) == 0)) - return true; - } - } - - // Otherwise, be a little bit aggressive by scanning the local block where we - // want to check to see if the pointer is already being loaded or stored - // from/to. If so, the previous load or store would have already trapped, - // so there is no harm doing an extra load (also, CSE will later eliminate - // the load entirely). - BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); - - while (BBI != E) { - --BBI; - - // If we see a free or a call which may write to memory (i.e. which might do - // a free) the pointer could be marked invalid. - if (isa(BBI) && BBI->mayWriteToMemory() && - !isa(BBI)) - return false; - - if (LoadInst *LI = dyn_cast(BBI)) { - if (LI->getOperand(0) == V) return true; - } else if (StoreInst *SI = dyn_cast(BBI)) { - if (SI->getOperand(1) == V) return true; - } - } - return false; -} - - -//===----------------------------------------------------------------------===// // Local constant propagation. // @@ -537,9 +432,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) { // Use that list to make another list of common predecessors of BB and Succ BlockSet CommonPreds; for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ); - PI != PE; ++PI) - if (BBPreds.count(*PI)) - CommonPreds.insert(*PI); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (BBPreds.count(P)) + CommonPreds.insert(P); + } // Shortcut, if there are no common predecessors, merging is always safe if (CommonPreds.empty()) diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 1ef3c32..4f4edf3 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -142,9 +142,11 @@ ReprocessLoop: if (*BB == L->getHeader()) continue; SmallPtrSet BadPreds; - for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI) - if (!L->contains(*PI)) - BadPreds.insert(*PI); + for (pred_iterator PI = pred_begin(*BB), PE = pred_end(*BB); PI != PE; ++PI){ + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } // Delete each unique out-of-loop (and thus dead) predecessor. for (SmallPtrSet::iterator I = BadPreds.begin(), @@ -192,7 +194,7 @@ ReprocessLoop: if (!Preheader) { Preheader = InsertPreheaderForLoop(L); if (Preheader) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -215,7 +217,7 @@ ReprocessLoop: // allowed. if (!L->contains(*PI)) { if (RewriteLoopExitBlock(L, ExitBlock)) { - NumInserted++; + ++NumInserted; Changed = true; } break; @@ -244,7 +246,7 @@ ReprocessLoop: // loop header. LoopLatch = InsertUniqueBackedgeBlock(L, Preheader); if (LoopLatch) { - NumInserted++; + ++NumInserted; Changed = true; } } @@ -353,16 +355,18 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { // Compute the set of predecessors of the loop that are not in the loop. SmallVector OutsideBlocks; for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); - PI != PE; ++PI) - if (!L->contains(*PI)) { // Coming in from outside the loop? + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? // If the loop is branched to from an indirect branch, we won't // be able to fully transform the loop, because it prohibits // edge splitting. - if (isa((*PI)->getTerminator())) return 0; + if (isa(P->getTerminator())) return 0; // Keep track of it. - OutsideBlocks.push_back(*PI); + OutsideBlocks.push_back(P); } + } // Split out the loop pre-header. BasicBlock *NewBB = @@ -385,13 +389,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) { /// outside of the loop. BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) { SmallVector LoopBlocks; - for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) - if (L->contains(*I)) { + for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I) { + BasicBlock *P = *I; + if (L->contains(P)) { // Don't do this if the loop is exited via an indirect branch. - if (isa((*I)->getTerminator())) return 0; + if (isa(P->getTerminator())) return 0; - LoopBlocks.push_back(*I); + LoopBlocks.push_back(P); } + } assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?"); BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0], @@ -559,10 +565,11 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { // Determine which blocks should stay in L and which should be moved out to // the Outer loop now. std::set BlocksInL; - for (pred_iterator PI = pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) - if (DT->dominates(Header, *PI)) - AddBlockAndPredsToSet(*PI, Header, BlocksInL); - + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + AddBlockAndPredsToSet(P, Header, BlocksInL); + } // Scan all of the loop children of L, moving them to OuterLoop if they are // not part of the inner loop. @@ -610,8 +617,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // Figure out which basic blocks contain back-edges to the loop header. std::vector BackedgeBlocks; - for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I) - if (*I != Preheader) BackedgeBlocks.push_back(*I); + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + if (P != Preheader) BackedgeBlocks.push_back(P); + } // Create and insert the new backedge block... BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 84fd1eb..e0e07e7 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -37,13 +37,13 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. static inline void RemapInstruction(Instruction *I, - DenseMap &ValueMap) { + ValueMap &VMap) { for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) { Value *Op = I->getOperand(op); - DenseMap::iterator It = ValueMap.find(Op); - if (It != ValueMap.end()) + ValueMap::iterator It = VMap.find(Op); + if (It != VMap.end()) I->setOperand(op, It->second); } } @@ -183,7 +183,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. - typedef DenseMap ValueToValueMapTy; + typedef ValueMap ValueToValueMapTy; ValueToValueMapTy LastValueMap; std::vector OrigPHINode; for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { @@ -205,26 +205,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) for (std::vector::iterator BB = LoopBlocks.begin(), E = LoopBlocks.end(); BB != E; ++BB) { - ValueToValueMapTy ValueMap; - BasicBlock *New = CloneBasicBlock(*BB, ValueMap, "." + Twine(It)); + ValueToValueMapTy VMap; + BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); Header->getParent()->getBasicBlockList().push_back(New); // Loop over all of the PHI nodes in the block, changing them to use the // incoming values from the previous block. if (*BB == Header) for (unsigned i = 0, e = OrigPHINode.size(); i != e; ++i) { - PHINode *NewPHI = cast(ValueMap[OrigPHINode[i]]); + PHINode *NewPHI = cast(VMap[OrigPHINode[i]]); Value *InVal = NewPHI->getIncomingValueForBlock(LatchBlock); if (Instruction *InValI = dyn_cast(InVal)) if (It > 1 && L->contains(InValI)) InVal = LastValueMap[InValI]; - ValueMap[OrigPHINode[i]] = InVal; + VMap[OrigPHINode[i]] = InVal; New->getInstList().erase(NewPHI); } // Update our running map of newest clones LastValueMap[*BB] = New; - for (ValueToValueMapTy::iterator VI = ValueMap.begin(), VE = ValueMap.end(); + for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); VI != VE; ++VI) LastValueMap[VI->first] = VI->second; diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index 0ed8c72..2696e69 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -45,6 +45,7 @@ #include "llvm/Pass.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" @@ -62,10 +63,7 @@ static cl::opt ExpensiveEHSupport("enable-correct-eh-support", namespace { class LowerInvoke : public FunctionPass { // Used for both models. - Constant *WriteFn; Constant *AbortFn; - Value *AbortMessage; - unsigned AbortMessageLength; // Used for expensive EH support. const Type *JBLinkTy; @@ -92,10 +90,8 @@ namespace { } private: - void createAbortMessage(Module *M); - void writeAbortMessage(Instruction *IB); bool insertCheapEHSupport(Function &F); - void splitLiveRangesLiveAcrossInvokes(std::vector &Invokes); + void splitLiveRangesLiveAcrossInvokes(SmallVectorImpl&Invokes); void rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, AllocaInst *InvokeNum, AllocaInst *StackPtr, SwitchInst *CatchSwitch); @@ -123,7 +119,6 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI, bool LowerInvoke::doInitialization(Module &M) { const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); - AbortMessage = 0; if (useExpensiveEHSupport) { // Insert a type for the linked list of jump buffers. unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0; @@ -175,68 +170,14 @@ bool LowerInvoke::doInitialization(Module &M) { // We need the 'write' and 'abort' functions for both models. AbortFn = M.getOrInsertFunction("abort", Type::getVoidTy(M.getContext()), (Type *)0); -#if 0 // "write" is Unix-specific.. code is going away soon anyway. - WriteFn = M.getOrInsertFunction("write", Type::VoidTy, Type::Int32Ty, - VoidPtrTy, Type::Int32Ty, (Type *)0); -#else - WriteFn = 0; -#endif return true; } -void LowerInvoke::createAbortMessage(Module *M) { - if (useExpensiveEHSupport) { - // The abort message for expensive EH support tells the user that the - // program 'unwound' without an 'invoke' instruction. - Constant *Msg = - ConstantArray::get(M->getContext(), - "ERROR: Exception thrown, but not caught!\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector GEPIdx(2, - Constant::getNullValue(Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } else { - // The abort message for cheap EH support tells the user that EH is not - // enabled. - Constant *Msg = - ConstantArray::get(M->getContext(), - "Exception handler needed, but not enabled." - "Recompile program with -enable-correct-eh-support.\n"); - AbortMessageLength = Msg->getNumOperands()-1; // don't include \0 - - GlobalVariable *MsgGV = new GlobalVariable(*M, Msg->getType(), true, - GlobalValue::InternalLinkage, - Msg, "abortmsg"); - std::vector GEPIdx(2, Constant::getNullValue( - Type::getInt32Ty(M->getContext()))); - AbortMessage = ConstantExpr::getGetElementPtr(MsgGV, &GEPIdx[0], 2); - } -} - - -void LowerInvoke::writeAbortMessage(Instruction *IB) { -#if 0 - if (AbortMessage == 0) - createAbortMessage(IB->getParent()->getParent()->getParent()); - - // These are the arguments we WANT... - Value* Args[3]; - Args[0] = ConstantInt::get(Type::Int32Ty, 2); - Args[1] = AbortMessage; - Args[2] = ConstantInt::get(Type::Int32Ty, AbortMessageLength); - (new CallInst(WriteFn, Args, 3, "", IB))->setTailCall(); -#endif -} - bool LowerInvoke::insertCheapEHSupport(Function &F) { bool Changed = false; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast(BB->getTerminator())) { - std::vector CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector CallArgs(II->op_begin(), II->op_end() - 3); // Insert a normal call instruction... CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), @@ -257,9 +198,6 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) { ++NumInvokes; Changed = true; } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(UI); - // Insert a call to abort() CallInst::Create(AbortFn, "", UI)->setTailCall(); @@ -320,7 +258,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo, CatchSwitch->addCase(InvokeNoC, II->getUnwindDest()); // Insert a normal call instruction. - std::vector CallArgs(II->op_begin(), II->op_end() - 3); + SmallVector CallArgs(II->op_begin(), II->op_end() - 3); CallInst *NewCall = CallInst::Create(II->getCalledValue(), CallArgs.begin(), CallArgs.end(), "", II); @@ -349,7 +287,7 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { // across the unwind edge. This process also splits all critical edges // coming out of invoke's. void LowerInvoke:: -splitLiveRangesLiveAcrossInvokes(std::vector &Invokes) { +splitLiveRangesLiveAcrossInvokes(SmallVectorImpl &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -371,16 +309,33 @@ splitLiveRangesLiveAcrossInvokes(std::vector &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa(Ty) || isa(Ty) || isa(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -402,7 +357,7 @@ splitLiveRangesLiveAcrossInvokes(std::vector &Invokes) { continue; // Avoid iterator invalidation by copying users to a temporary vector. - std::vector Users; + SmallVector Users; for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { Instruction *User = cast(*UI); @@ -452,9 +407,9 @@ splitLiveRangesLiveAcrossInvokes(std::vector &Invokes) { } bool LowerInvoke::insertExpensiveEHSupport(Function &F) { - std::vector Returns; - std::vector Unwinds; - std::vector Invokes; + SmallVector Returns; + SmallVector Unwinds; + SmallVector Invokes; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { @@ -502,12 +457,11 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { new AllocaInst(JBLinkTy, 0, Align, "jblink", F.begin()->begin()); - std::vector Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 1)); - OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 1) }; + OldJmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "OldBuf", - EntryBB->getTerminator()); + EntryBB->getTerminator()); // Copy the JBListHead to the alloca. Value *OldBuf = new LoadInst(JBListHead, "oldjmpbufptr", true, @@ -552,7 +506,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { "setjmp.cont"); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 0); - Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, Idx.begin(), Idx.end(), + Value *JmpBufPtr = GetElementPtrInst::Create(JmpBuf, &Idx[0], &Idx[2], "TheJmpBuf", EntryBB->getTerminator()); JmpBufPtr = new BitCastInst(JmpBufPtr, @@ -605,24 +559,20 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) { // Create the block to do the longjmp. // Get a pointer to the jmpbuf and longjmp. - std::vector Idx; - Idx.push_back(Constant::getNullValue(Type::getInt32Ty(F.getContext()))); - Idx.push_back(ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)); - Idx[0] = GetElementPtrInst::Create(BufPtr, Idx.begin(), Idx.end(), "JmpBuf", + Value *Idx[] = { Constant::getNullValue(Type::getInt32Ty(F.getContext())), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0) }; + Idx[0] = GetElementPtrInst::Create(BufPtr, &Idx[0], &Idx[2], "JmpBuf", UnwindBlock); Idx[0] = new BitCastInst(Idx[0], Type::getInt8PtrTy(F.getContext()), "tmp", UnwindBlock); Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1); - CallInst::Create(LongJmpFn, Idx.begin(), Idx.end(), "", UnwindBlock); + CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock); new UnreachableInst(F.getContext(), UnwindBlock); // Set up the term block ("throw without a catch"). new UnreachableInst(F.getContext(), TermBlock); - // Insert a new call to write(2, AbortMessage, AbortMessageLength); - writeAbortMessage(TermBlock->getTerminator()); - // Insert a call to abort() CallInst::Create(AbortFn, "", TermBlock->getTerminator())->setTailCall(); diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 13f0a28..c0de193 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -69,11 +69,12 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { // Only allow direct and non-volatile loads and stores... for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) // Loop over all of the uses of the alloca - if (const LoadInst *LI = dyn_cast(*UI)) { + UI != UE; ++UI) { // Loop over all of the uses of the alloca + const User *U = *UI; + if (const LoadInst *LI = dyn_cast(U)) { if (LI->isVolatile()) return false; - } else if (const StoreInst *SI = dyn_cast(*UI)) { + } else if (const StoreInst *SI = dyn_cast(U)) { if (SI->getOperand(0) == AI) return false; // Don't allow a store OF the AI, only INTO the AI. if (SI->isVolatile()) @@ -81,6 +82,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { } else { return false; } + } return true; } @@ -603,9 +605,8 @@ ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, // To determine liveness, we must iterate through the predecessors of blocks // where the def is live. Blocks are added to the worklist if we need to // check their predecessors. Start with all the using blocks. - SmallVector LiveInBlockWorklist; - LiveInBlockWorklist.insert(LiveInBlockWorklist.end(), - Info.UsingBlocks.begin(), Info.UsingBlocks.end()); + SmallVector LiveInBlockWorklist(Info.UsingBlocks.begin(), + Info.UsingBlocks.end()); // If any of the using blocks is also a definition block, check to see if the // definition occurs before or after the use. If it happens before the use, @@ -897,6 +898,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, // Propagate any debug metadata from the store onto the dbg.value. if (MDNode *SIMD = SI->getMetadata("dbg")) DbgVal->setMetadata("dbg", SIMD); + // Otherwise propagate debug metadata from dbg.declare. + else if (MDNode *MD = DDI->getMetadata("dbg")) + DbgVal->setMetadata("dbg", MD); } // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 9f2209d..fd3ed3e 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1513,17 +1513,19 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) - if ((PBI = dyn_cast((*PI)->getTerminator())) && + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + BasicBlock *P = *PI; + if ((PBI = dyn_cast(P->getTerminator())) && PBI != BI && PBI->isConditional() && PBI->getCondition() == BI->getCondition() && PBI->getSuccessor(0) != PBI->getSuccessor(1)) { bool CondIsTrue = PBI->getSuccessor(0) == BB; NewPN->addIncoming(ConstantInt::get(Type::getInt1Ty(BB->getContext()), - CondIsTrue), *PI); + CondIsTrue), P); } else { - NewPN->addIncoming(BI->getCondition(), *PI); + NewPN->addIncoming(BI->getCondition(), P); } + } BI->setCondition(NewPN); return true; @@ -1697,10 +1699,11 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) { SmallVector UncondBranchPreds; SmallVector CondBranchPreds; for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { - TerminatorInst *PTI = (*PI)->getTerminator(); + BasicBlock *P = *PI; + TerminatorInst *PTI = P->getTerminator(); if (BranchInst *BI = dyn_cast(PTI)) { if (BI->isUnconditional()) - UncondBranchPreds.push_back(*PI); + UncondBranchPreds.push_back(P); else CondBranchPreds.push_back(BI); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 87ce631..3f6a90c 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -28,7 +28,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { // DenseMap. This includes any recursive calls to MapValue. // Global values and non-function-local metadata do not need to be seeded into - // the ValueMap if they are using the identity mapping. + // the VM if they are using the identity mapping. if (isa(V) || isa(V) || isa(V) || (isa(V) && !cast(V)->isFunctionLocal())) return VMSlot = const_cast(V); @@ -45,7 +45,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { if (isa(C) || isa(C) || isa(C) || isa(C) || - isa(C) || isa(C)) + isa(C)) return VMSlot = C; // Primitive constants map directly if (ConstantArray *CA = dyn_cast(C)) { @@ -125,11 +125,11 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM) { } /// RemapInstruction - Convert the instruction operands from referencing the -/// current values into those specified by ValueMap. +/// current values into those specified by VMap. /// -void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &ValueMap) { +void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap) { for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) { - Value *V = MapValue(*op, ValueMap); + Value *V = MapValue(*op, VMap); assert(V && "Referenced value not in value map!"); *op = V; } diff --git a/lib/Transforms/Utils/ValueMapper.h b/lib/Transforms/Utils/ValueMapper.h index d61c24c..f4ff643 100644 --- a/lib/Transforms/Utils/ValueMapper.h +++ b/lib/Transforms/Utils/ValueMapper.h @@ -15,12 +15,12 @@ #ifndef VALUEMAPPER_H #define VALUEMAPPER_H -#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ValueMap.h" namespace llvm { class Value; class Instruction; - typedef DenseMap ValueToValueMapTy; + typedef ValueMap ValueToValueMapTy; Value *MapValue(const Value *V, ValueToValueMapTy &VM); void RemapInstruction(Instruction *I, ValueToValueMapTy &VM); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index e48c026..7a471ef 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -70,8 +70,7 @@ static const Module *getModuleFromVal(const Value *V) { // PrintEscapedString - Print each character of the specified string, escaping // it if it is not printable or if it is an escape char. -static void PrintEscapedString(const StringRef &Name, - raw_ostream &Out) { +static void PrintEscapedString(StringRef Name, raw_ostream &Out) { for (unsigned i = 0, e = Name.size(); i != e; ++i) { unsigned char C = Name[i]; if (isprint(C) && C != '\\' && C != '"') @@ -1419,6 +1418,9 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::ExternalLinkage: break; case GlobalValue::PrivateLinkage: Out << "private "; break; case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "linker_private_weak "; + break; case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; @@ -1469,8 +1471,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { writeOperand(GV->getInitializer(), false); } - if (GV->hasSection()) - Out << ", section \"" << GV->getSection() << '"'; + if (GV->hasSection()) { + Out << ", section \""; + PrintEscapedString(GV->getSection(), Out); + Out << '"'; + } if (GV->getAlignment()) Out << ", align " << GV->getAlignment(); @@ -1628,8 +1633,11 @@ void AssemblyWriter::printFunction(const Function *F) { Attributes FnAttrs = Attrs.getFnAttributes(); if (FnAttrs != Attribute::None) Out << ' ' << Attribute::getAsString(Attrs.getFnAttributes()); - if (F->hasSection()) - Out << " section \"" << F->getSection() << '"'; + if (F->hasSection()) { + Out << " section \""; + PrintEscapedString(F->getSection(), Out); + Out << '"'; + } if (F->getAlignment()) Out << " align " << F->getAlignment(); if (F->hasGC()) @@ -1854,6 +1862,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { default: Out << " cc" << CI->getCallingConv(); break; } + Operand = CI->getCalledValue(); const PointerType *PTy = cast(Operand->getType()); const FunctionType *FTy = cast(PTy->getElementType()); const Type *RetTy = FTy->getReturnType(); @@ -1877,10 +1886,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(Operand, true); } Out << '('; - for (unsigned op = 1, Eop = I.getNumOperands(); op < Eop; ++op) { - if (op > 1) + for (unsigned op = 0, Eop = CI->getNumArgOperands(); op < Eop; ++op) { + if (op > 0) Out << ", "; - writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op)); + writeParamOperand(CI->getArgOperand(op), PAL.getParamAttributes(op + 1)); } Out << ')'; if (PAL.getFnAttributes() != Attribute::None) @@ -1925,10 +1934,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) { writeOperand(Operand, true); } Out << '('; - for (unsigned op = 0, Eop = I.getNumOperands() - 3; op < Eop; ++op) { + for (unsigned op = 0, Eop = II->getNumArgOperands(); op < Eop; ++op) { if (op) Out << ", "; - writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1)); + writeParamOperand(II->getArgOperand(op), PAL.getParamAttributes(op + 1)); } Out << ')'; @@ -2027,7 +2036,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) { } static void WriteMDNodeComment(const MDNode *Node, - formatted_raw_ostream &Out) { + formatted_raw_ostream &Out) { if (Node->getNumOperands() < 1) return; ConstantInt *CI = dyn_cast_or_null(Node->getOperand(0)); diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 0144210..dc39024 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -18,6 +18,7 @@ #include "llvm/Module.h" #include "llvm/IntrinsicInst.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include @@ -314,7 +315,8 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *F = CI->getCalledFunction(); LLVMContext &C = CI->getContext(); - + ImmutableCallSite CS(CI); + assert(F && "CallInst has no function associated with it."); if (!NewFn) { @@ -344,11 +346,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD || isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { std::vector Idxs; - Value *Op0 = CI->getOperand(1); + Value *Op0 = CI->getArgOperand(0); ShuffleVectorInst *SI = NULL; if (isLoadH || isLoadL) { Value *Op1 = UndefValue::get(Op0->getType()); - Value *Addr = new BitCastInst(CI->getOperand(2), + Value *Addr = new BitCastInst(CI->getArgOperand(1), Type::getDoublePtrTy(C), "upgraded.", CI); Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI); @@ -381,7 +383,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI); } else if (isMovSD || isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) { - Value *Op1 = CI->getOperand(2); + Value *Op1 = CI->getArgOperand(1); if (isMovSD) { Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2)); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1)); @@ -395,8 +397,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Mask = ConstantVector::get(Idxs); SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI); } else if (isShufPD) { - Value *Op1 = CI->getOperand(2); - unsigned MaskVal = cast(CI->getOperand(3))->getZExtValue(); + Value *Op1 = CI->getArgOperand(1); + unsigned MaskVal = cast(CI->getArgOperand(2))->getZExtValue(); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1)); Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), ((MaskVal >> 1) & 1)+2)); @@ -416,8 +418,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.sse41.pmulld") { // Upgrade this set of intrinsics into vector multiplies. - Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1), - CI->getOperand(2), + Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0), + CI->getArgOperand(1), CI->getName(), CI); // Fix up all the uses with our new multiply. @@ -427,9 +429,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove upgraded multiply. CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.ssse3.palign.r") { - Value *Op1 = CI->getOperand(1); - Value *Op2 = CI->getOperand(2); - Value *Op3 = CI->getOperand(3); + Value *Op1 = CI->getArgOperand(0); + Value *Op2 = CI->getArgOperand(1); + Value *Op3 = CI->getArgOperand(2); unsigned shiftVal = cast(Op3)->getZExtValue(); Value *Rep; IRBuilder<> Builder(C); @@ -483,9 +485,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") { - Value *Op1 = CI->getOperand(1); - Value *Op2 = CI->getOperand(2); - Value *Op3 = CI->getOperand(3); + Value *Op1 = CI->getArgOperand(0); + Value *Op2 = CI->getArgOperand(1); + Value *Op3 = CI->getArgOperand(2); unsigned shiftVal = cast(Op3)->getZExtValue(); Value *Rep; IRBuilder<> Builder(C); @@ -556,10 +558,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::x86_mmx_psrl_w: { Value *Operands[2]; - Operands[0] = CI->getOperand(1); + Operands[0] = CI->getArgOperand(0); // Cast the second parameter to the correct type. - BitCastInst *BC = new BitCastInst(CI->getOperand(2), + BitCastInst *BC = new BitCastInst(CI->getArgOperand(1), NewFn->getFunctionType()->getParamType(1), "upgraded.", CI); Operands[1] = BC; @@ -583,9 +585,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::ctlz: case Intrinsic::ctpop: case Intrinsic::cttz: { - // Build a small vector of the 1..(N-1) operands, which are the - // parameters. - SmallVector Operands(CI->op_begin()+1, CI->op_end()); + // Build a small vector of the original arguments. + SmallVector Operands(CS.arg_begin(), CS.arg_end()); // Construct a new CallInst CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), @@ -620,7 +621,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::eh_selector: case Intrinsic::eh_typeid_for: { // Only the return type changed. - SmallVector Operands(CI->op_begin() + 1, CI->op_end()); + SmallVector Operands(CS.arg_begin(), CS.arg_end()); CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(), "upgraded." + CI->getName(), CI); NewCI->setTailCall(CI->isTailCall()); @@ -643,8 +644,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { case Intrinsic::memset: { // Add isVolatile const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext()); - Value *Operands[5] = { CI->getOperand(1), CI->getOperand(2), - CI->getOperand(3), CI->getOperand(4), + Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1), + CI->getArgOperand(2), CI->getArgOperand(3), llvm::ConstantInt::get(I1Ty, 0) }; CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5, CI->getName(), CI); @@ -726,7 +727,8 @@ void llvm::CheckDebugInfoIntrinsics(Module *M) { if (Function *Declare = M->getFunction("llvm.dbg.declare")) { if (!Declare->use_empty()) { DbgDeclareInst *DDI = cast(Declare->use_back()); - if (!isa(DDI->getOperand(1)) ||!isa(DDI->getOperand(2))) { + if (!isa(DDI->getArgOperand(0)) || + !isa(DDI->getArgOperand(1))) { while (!Declare->use_empty()) { CallInst *CI = cast(Declare->use_back()); CI->eraseFromParent(); diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 549977c..3567266 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -658,7 +658,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } } // Handle an offsetof-like expression. - if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){ + if (Ty->isStructTy() || Ty->isArrayTy()) { if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2), DestTy, false)) return C; @@ -1817,8 +1817,15 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, return Constant::getAllOnesValue(ResultTy); // Handle some degenerate cases first - if (isa(C1) || isa(C2)) + if (isa(C1) || isa(C2)) { + // For EQ and NE, we can always pick a value for the undef to make the + // predicate pass or fail, so we can return undef. + if (ICmpInst::isEquality(ICmpInst::Predicate(pred))) + return UndefValue::get(ResultTy); + // Otherwise, pick the same value as the non-undef operand, and fold + // it to true or false. return ConstantInt::get(ResultTy, CmpInst::isTrueWhenEqual(pred)); + } // No compile-time operations on this type yet. if (C1->getType()->isPPC_FP128Ty()) @@ -2194,7 +2201,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C, } NewIndices.push_back(Combined); - NewIndices.insert(NewIndices.end(), Idxs+1, Idxs+NumIdx); + NewIndices.append(Idxs+1, Idxs+NumIdx); return (inBounds && cast(CE)->isInBounds()) ? ConstantExpr::getInBoundsGetElementPtr(CE->getOperand(0), &NewIndices[0], diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index bbf1375..ca1a399 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1058,6 +1058,8 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMPrivateLinkage; case GlobalValue::LinkerPrivateLinkage: return LLVMLinkerPrivateLinkage; + case GlobalValue::LinkerPrivateWeakLinkage: + return LLVMLinkerPrivateWeakLinkage; case GlobalValue::DLLImportLinkage: return LLVMDLLImportLinkage; case GlobalValue::DLLExportLinkage: @@ -1108,6 +1110,9 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { case LLVMLinkerPrivateLinkage: GV->setLinkage(GlobalValue::LinkerPrivateLinkage); break; + case LLVMLinkerPrivateWeakLinkage: + GV->setLinkage(GlobalValue::LinkerPrivateWeakLinkage); + break; case LLVMDLLImportLinkage: GV->setLinkage(GlobalValue::DLLImportLinkage); break; @@ -2205,15 +2210,14 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { - MemoryBuffer *MB = MemoryBuffer::getSTDIN(); - if (!MB->getBufferSize()) { - delete MB; - *OutMessage = strdup("stdin is empty."); - return 1; + std::string Error; + if (MemoryBuffer *MB = MemoryBuffer::getSTDIN(&Error)) { + *OutMemBuf = wrap(MB); + return 0; } - *OutMemBuf = wrap(MB); - return 0; + *OutMessage = strdup(Error.c_str()); + return 1; } void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) { diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index a37fe07..9792ada 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -286,9 +286,10 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const { for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { // PHI nodes uses values in the corresponding predecessor block. For other // instructions, just check to see whether the parent of the use matches up. - const PHINode *PN = dyn_cast(*UI); + const User *U = *UI; + const PHINode *PN = dyn_cast(U); if (PN == 0) { - if (cast(*UI)->getParent() != BB) + if (cast(U)->getParent() != BB) return true; continue; } @@ -401,12 +402,20 @@ bool Instruction::isSafeToSpeculativelyExecute() const { return false; // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - if (isa(getOperand(0))) + // It's also not safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. + Value *Op0 = getOperand(0); + if (GEPOperator *GEP = dyn_cast(Op0)) { + // TODO: it's safe to do this for any GEP with constant indices that + // compute inside the allocated type, but not for any inbounds gep. + if (GEP->hasAllZeroIndices()) + Op0 = GEP->getPointerOperand(); + } + if (isa(Op0)) return true; if (GlobalVariable *GV = dyn_cast(getOperand(0))) return !GV->hasExternalWeakLinkage(); - // FIXME: Handle cases involving GEPs. We have to be careful because - // a load of a out-of-bounds GEP has undefined behavior. return false; } case Call: @@ -421,6 +430,7 @@ bool Instruction::isSafeToSpeculativelyExecute() const { case Store: case Ret: case Br: + case IndirectBr: case Switch: case Unwind: case Unreachable: diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index f64b220..c13696f 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -33,7 +33,9 @@ using namespace llvm; User::op_iterator CallSite::getCallee() const { Instruction *II(getInstruction()); return isCall() - ? cast(II)->op_begin() + ? (CallInst::ArgOffset + ? cast(II)->op_begin() + : cast(II)->op_end() - 1) : cast(II)->op_end() - 3; // Skip BB, BB, Function } @@ -231,8 +233,7 @@ CallInst::~CallInst() { void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert(NumOperands == NumParams+1 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; + Op() = Func; const FunctionType *FTy = cast(cast(Func->getType())->getElementType()); @@ -245,16 +246,15 @@ void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) { assert((i >= FTy->getNumParams() || FTy->getParamType(i) == Params[i]->getType()) && "Calling a function with a bad signature!"); - OL[i+1] = Params[i]; + OperandList[i + ArgOffset] = Params[i]; } } void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { assert(NumOperands == 3 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; - OL[1] = Actual1; - OL[2] = Actual2; + Op() = Func; + Op() = Actual1; + Op() = Actual2; const FunctionType *FTy = cast(cast(Func->getType())->getElementType()); @@ -273,9 +273,8 @@ void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) { void CallInst::init(Value *Func, Value *Actual) { assert(NumOperands == 2 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; - OL[1] = Actual; + Op() = Func; + Op() = Actual; const FunctionType *FTy = cast(cast(Func->getType())->getElementType()); @@ -291,8 +290,7 @@ void CallInst::init(Value *Func, Value *Actual) { void CallInst::init(Value *Func) { assert(NumOperands == 1 && "NumOperands not set up?"); - Use *OL = OperandList; - OL[0] = Func; + Op() = Func; const FunctionType *FTy = cast(cast(Func->getType())->getElementType()); @@ -473,9 +471,10 @@ static Instruction *createMalloc(Instruction *InsertBefore, Instruction *CallInst::CreateMalloc(Instruction *InsertBefore, const Type *IntPtrTy, const Type *AllocTy, Value *AllocSize, Value *ArraySize, + Function * MallocF, const Twine &Name) { return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize, - ArraySize, NULL, Name); + ArraySize, MallocF, Name); } /// CreateMalloc - Generate the IR for a call to malloc: @@ -527,8 +526,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore, } /// CreateFree - Generate the IR for a call to the builtin free function. -void CallInst::CreateFree(Value* Source, Instruction *InsertBefore) { - createFree(Source, InsertBefore, NULL); +Instruction * CallInst::CreateFree(Value* Source, Instruction *InsertBefore) { + return createFree(Source, InsertBefore, NULL); } /// CreateFree - Generate the IR for a call to the builtin free function. @@ -828,8 +827,8 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) { else { assert(!isa(Amt) && "Passed basic block into allocation size parameter! Use other ctor"); - assert(Amt->getType()->isIntegerTy(32) && - "Allocation array size is not a 32-bit integer!"); + assert(Amt->getType()->isIntegerTy() && + "Allocation array size is not an integer!"); } return Amt; } @@ -1456,7 +1455,7 @@ void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, Op<0>() = Agg; Op<1>() = Val; - Indices.insert(Indices.end(), Idx, Idx + NumIdx); + Indices.append(Idx, Idx + NumIdx); setName(Name); } @@ -1509,7 +1508,7 @@ void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx, const Twine &Name) { assert(NumOperands == 1 && "NumOperands not initialized?"); - Indices.insert(Indices.end(), Idx, Idx + NumIdx); + Indices.append(Idx, Idx + NumIdx); setName(Name); } @@ -1911,9 +1910,12 @@ bool CastInst::isLosslessCast() const { /// # bitcast i32* %x to i8* /// # bitcast <2 x i32> %x to <4 x i16> /// # ptrtoint i32* %x to i32 ; on 32-bit plaforms only -/// @brief Determine if a cast is a no-op. -bool CastInst::isNoopCast(const Type *IntPtrTy) const { - switch (getOpcode()) { +/// @brief Determine if the described cast is a no-op. +bool CastInst::isNoopCast(Instruction::CastOps Opcode, + const Type *SrcTy, + const Type *DestTy, + const Type *IntPtrTy) { + switch (Opcode) { default: assert(!"Invalid CastOp"); case Instruction::Trunc: @@ -1930,13 +1932,18 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const { return true; // BitCast never modifies bits. case Instruction::PtrToInt: return IntPtrTy->getScalarSizeInBits() == - getType()->getScalarSizeInBits(); + DestTy->getScalarSizeInBits(); case Instruction::IntToPtr: return IntPtrTy->getScalarSizeInBits() == - getOperand(0)->getType()->getScalarSizeInBits(); + SrcTy->getScalarSizeInBits(); } } +/// @brief Determine if a cast is a no-op. +bool CastInst::isNoopCast(const Type *IntPtrTy) const { + return isNoopCast(getOpcode(), getOperand(0)->getType(), getType(), IntPtrTy); +} + /// This function determines if a pair of casts can be eliminated and what /// opcode should be used in the elimination. This assumes that there are two /// instructions like this: @@ -1999,6 +2006,14 @@ unsigned CastInst::isEliminableCastPair( { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr | { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+ }; + + // If either of the casts are a bitcast from scalar to vector, disallow the + // merging. + if ((firstOp == Instruction::BitCast && + isa(SrcTy) != isa(MidTy)) || + (secondOp == Instruction::BitCast && + isa(MidTy) != isa(DstTy))) + return 0; // Disallowed int ElimCase = CastResults[firstOp-Instruction::CastOpsBegin] [secondOp-Instruction::CastOpsBegin]; diff --git a/lib/VMCore/IntrinsicInst.cpp b/lib/VMCore/IntrinsicInst.cpp index c37d5b0..ac8ec20 100644 --- a/lib/VMCore/IntrinsicInst.cpp +++ b/lib/VMCore/IntrinsicInst.cpp @@ -54,7 +54,7 @@ Value *DbgInfoIntrinsic::StripCast(Value *C) { /// Value *DbgDeclareInst::getAddress() const { - if (MDNode* MD = cast_or_null(getOperand(1))) + if (MDNode* MD = cast_or_null(getArgOperand(0))) return MD->getOperand(0); else return NULL; @@ -65,9 +65,9 @@ Value *DbgDeclareInst::getAddress() const { /// const Value *DbgValueInst::getValue() const { - return cast(getOperand(1))->getOperand(0); + return cast(getArgOperand(0))->getOperand(0); } Value *DbgValueInst::getValue() { - return cast(getOperand(1))->getOperand(0); + return cast(getArgOperand(0))->getOperand(0); } diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index b894ea3..1d3a058 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -133,6 +133,7 @@ static const Function *getFunctionForValue(Value *V) { static const Function *assertLocalFunction(const MDNode *N) { if (!N->isFunctionLocal()) return 0; + // FIXME: This does not handle cyclic function local metadata. const Function *F = 0, *NewF = 0; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (Value *V = N->getOperand(i)) { diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp index 94840f0..38a51df 100644 --- a/lib/VMCore/Module.cpp +++ b/lib/VMCore/Module.cpp @@ -17,6 +17,7 @@ #include "llvm/DerivedTypes.h" #include "llvm/GVMaterializer.h" #include "llvm/LLVMContext.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/LeakDetector.h" @@ -311,9 +312,11 @@ GlobalAlias *Module::getNamedAlias(StringRef Name) const { /// getNamedMetadata - Return the first NamedMDNode in the module with the /// specified name. This method returns null if a NamedMDNode with the -//// specified name is not found. -NamedMDNode *Module::getNamedMetadata(StringRef Name) const { - return NamedMDSymTab->lookup(Name); +/// specified name is not found. +NamedMDNode *Module::getNamedMetadata(const Twine &Name) const { + SmallString<256> NameData; + StringRef NameRef = Name.toStringRef(NameData); + return NamedMDSymTab->lookup(NameRef); } /// getOrInsertNamedMetadata - Return the first named MDNode in the module diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp index a60877d..efd98af 100644 --- a/lib/VMCore/Pass.cpp +++ b/lib/VMCore/Pass.cpp @@ -35,6 +35,15 @@ using namespace llvm; // Pass Implementation // +Pass::Pass(PassKind K, intptr_t pid) : Resolver(0), PassID(pid), Kind(K) { + assert(pid && "pid cannot be 0"); +} + +Pass::Pass(PassKind K, const void *pid) + : Resolver(0), PassID((intptr_t)pid), Kind(K) { + assert(pid && "pid cannot be 0"); +} + // Force out-of-line virtual method. Pass::~Pass() { delete Resolver; @@ -92,6 +101,23 @@ void Pass::verifyAnalysis() const { // By default, don't do anything. } +void *Pass::getAdjustedAnalysisPointer(const PassInfo *) { + return this; +} + +ImmutablePass *Pass::getAsImmutablePass() { + return 0; +} + +PMDataManager *Pass::getAsPMDataManager() { + return 0; +} + +void Pass::setResolver(AnalysisResolver *AR) { + assert(!Resolver && "Resolver is already set"); + Resolver = AR; +} + // print - Print out the internal state of the pass. This is called by Analyze // to print out the contents of an analysis. Otherwise it is not necessary to // implement this method. @@ -364,6 +390,14 @@ void PassInfo::unregisterPass() { getPassRegistrar()->UnregisterPass(*this); } +Pass *PassInfo::createPass() const { + assert((!isAnalysisGroup() || NormalCtor) && + "No default implementation found for analysis group!"); + assert(NormalCtor && + "Cannot call createPass on PassInfo without default ctor!"); + return NormalCtor(); +} + //===----------------------------------------------------------------------===// // Analysis Group Implementation Code //===----------------------------------------------------------------------===// @@ -467,4 +501,15 @@ void AnalysisUsage::setPreservesCFG() { GetCFGOnlyPasses(Preserved).enumeratePasses(); } +AnalysisUsage &AnalysisUsage::addRequiredID(AnalysisID ID) { + assert(ID && "Pass class not registered!"); + Required.push_back(ID); + return *this; +} +AnalysisUsage &AnalysisUsage::addRequiredTransitiveID(AnalysisID ID) { + assert(ID && "Pass class not registered!"); + Required.push_back(ID); + RequiredTransitive.push_back(ID); + return *this; +} diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index a56938c..296b0d1 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1147,6 +1147,11 @@ void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { llvm_unreachable("Unable to schedule pass"); } +Pass *PMDataManager::getOnTheFlyPass(Pass *P, const PassInfo *PI, Function &F) { + assert(0 && "Unable to find on the fly pass"); + return NULL; +} + // Destructor PMDataManager::~PMDataManager() { for (SmallVector::iterator I = PassVector.begin(), diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp index 645dd5a..585edf0 100644 --- a/lib/VMCore/Value.cpp +++ b/lib/VMCore/Value.cpp @@ -322,7 +322,13 @@ void Value::replaceAllUsesWith(Value *New) { Value *Value::stripPointerCasts() { if (!getType()->isPointerTy()) return this; + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Value *V = this; + Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { if (!GEP->hasAllZeroIndices()) @@ -338,7 +344,9 @@ Value *Value::stripPointerCasts() { return V; } assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (1); + } while (Visited.insert(V)); + + return V; } Value *Value::getUnderlyingObject(unsigned MaxLookup) { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 75988cc..f97699d 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -85,7 +85,8 @@ namespace { // Anonymous namespace for class for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (I->empty() || !I->back().isTerminator()) { - dbgs() << "Basic Block does not have terminator!\n"; + dbgs() << "Basic Block in function '" << F.getName() + << "' does not have terminator!\n"; WriteAsOperand(dbgs(), I, true); dbgs() << "\n"; Broken = true; @@ -1356,7 +1357,7 @@ void Verifier::visitLoadInst(LoadInst &LI) { void Verifier::visitStoreInst(StoreInst &SI) { const PointerType *PTy = dyn_cast(SI.getOperand(1)->getType()); - Assert1(PTy, "Load operand must be a pointer.", &SI); + Assert1(PTy, "Store operand must be a pointer.", &SI); const Type *ElTy = PTy->getElementType(); Assert2(ElTy == SI.getOperand(0)->getType(), "Stored value type does not match pointer operand type!", @@ -1371,8 +1372,8 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { &AI); Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", &AI); - Assert1(AI.getArraySize()->getType()->isIntegerTy(32), - "Alloca array size must be i32", &AI); + Assert1(AI.getArraySize()->getType()->isIntegerTy(), + "Alloca array size must have integer type", &AI); visitInstruction(AI); } @@ -1453,7 +1454,7 @@ void Verifier::visitInstruction(Instruction &I) { if (Function *F = dyn_cast(I.getOperand(i))) { // Check to make sure that the "address of" an intrinsic function is never // taken. - Assert1(!F->isIntrinsic() || (i == 0 && isa(I)), + Assert1(!F->isIntrinsic() || (i + 1 == e && isa(I)), "Cannot take the address of an intrinsic!", &I); Assert1(F->getParent() == Mod, "Referencing function in another module!", &I); @@ -1536,7 +1537,8 @@ void Verifier::visitInstruction(Instruction &I) { "Instruction does not dominate all uses!", Op, &I); } } else if (isa(I.getOperand(i))) { - Assert1((i == 0 && isa(I)) || (i + 3 == e && isa(I)), + Assert1((i + 1 == e && isa(I)) || + (i + 3 == e && isa(I)), "Cannot take the address of an inline asm!", &I); } } @@ -1628,24 +1630,24 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. - for (unsigned i = 1, e = CI.getNumOperands(); i != e; ++i) - if (MDNode *MD = dyn_cast(CI.getOperand(i))) + for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i) + if (MDNode *MD = dyn_cast(CI.getArgOperand(i))) visitMDNode(*MD, CI.getParent()->getParent()); switch (ID) { default: break; case Intrinsic::dbg_declare: { // llvm.dbg.declare - Assert1(CI.getOperand(1) && isa(CI.getOperand(1)), + Assert1(CI.getArgOperand(0) && isa(CI.getArgOperand(0)), "invalid llvm.dbg.declare intrinsic call 1", &CI); - MDNode *MD = cast(CI.getOperand(1)); + MDNode *MD = cast(CI.getArgOperand(0)); Assert1(MD->getNumOperands() == 1, "invalid llvm.dbg.declare intrinsic call 2", &CI); } break; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - Assert1(isa(CI.getOperand(4)), + Assert1(isa(CI.getArgOperand(3)), "alignment argument of memory intrinsics must be a constant int", &CI); break; @@ -1654,10 +1656,10 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { case Intrinsic::gcread: if (ID == Intrinsic::gcroot) { AllocaInst *AI = - dyn_cast(CI.getOperand(1)->stripPointerCasts()); + dyn_cast(CI.getArgOperand(0)->stripPointerCasts()); Assert1(AI && AI->getType()->getElementType()->isPointerTy(), "llvm.gcroot parameter #1 must be a pointer alloca.", &CI); - Assert1(isa(CI.getOperand(2)), + Assert1(isa(CI.getArgOperand(1)), "llvm.gcroot parameter #2 must be a constant.", &CI); } @@ -1665,32 +1667,32 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { "Enclosing function does not use GC.", &CI); break; case Intrinsic::init_trampoline: - Assert1(isa(CI.getOperand(2)->stripPointerCasts()), + Assert1(isa(CI.getArgOperand(1)->stripPointerCasts()), "llvm.init_trampoline parameter #2 must resolve to a function.", &CI); break; case Intrinsic::prefetch: - Assert1(isa(CI.getOperand(2)) && - isa(CI.getOperand(3)) && - cast(CI.getOperand(2))->getZExtValue() < 2 && - cast(CI.getOperand(3))->getZExtValue() < 4, + Assert1(isa(CI.getArgOperand(1)) && + isa(CI.getArgOperand(2)) && + cast(CI.getArgOperand(1))->getZExtValue() < 2 && + cast(CI.getArgOperand(2))->getZExtValue() < 4, "invalid arguments to llvm.prefetch", &CI); break; case Intrinsic::stackprotector: - Assert1(isa(CI.getOperand(2)->stripPointerCasts()), + Assert1(isa(CI.getArgOperand(1)->stripPointerCasts()), "llvm.stackprotector parameter #2 must resolve to an alloca.", &CI); break; case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: case Intrinsic::invariant_start: - Assert1(isa(CI.getOperand(1)), + Assert1(isa(CI.getArgOperand(0)), "size argument of memory use markers must be a constant integer", &CI); break; case Intrinsic::invariant_end: - Assert1(isa(CI.getOperand(2)), + Assert1(isa(CI.getArgOperand(1)), "llvm.invariant.end parameter #2 must be a constant integer", &CI); break; } diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll new file mode 100644 index 0000000..5d3f67e --- /dev/null +++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll @@ -0,0 +1,310 @@ +; RUN: opt -aa-eval -print-all-alias-modref-info -disable-output < %s |& FileCheck %s + +declare void @callee(double* %callee_arg) +declare void @nocap_callee(double* nocapture %nocap_callee_arg) + +declare double* @normal_returner() +declare noalias double* @noalias_returner() + +define void @caller_a(double* %arg_a0, + double* %arg_a1, + double* noalias %noalias_arg_a0, + double* noalias %noalias_arg_a1, + double** %indirect_a0, + double** %indirect_a1) { + %loaded_a0 = load double** %indirect_a0 + %loaded_a1 = load double** %indirect_a1 + + %escape_alloca_a0 = alloca double + %escape_alloca_a1 = alloca double + %noescape_alloca_a0 = alloca double + %noescape_alloca_a1 = alloca double + + %normal_ret_a0 = call double* @normal_returner() + %normal_ret_a1 = call double* @normal_returner() + %noalias_ret_a0 = call double* @noalias_returner() + %noalias_ret_a1 = call double* @noalias_returner() + + call void @callee(double* %escape_alloca_a0) + call void @callee(double* %escape_alloca_a1) + call void @nocap_callee(double* %noescape_alloca_a0) + call void @nocap_callee(double* %noescape_alloca_a1) + + store double 0.0, double* %loaded_a0 + store double 0.0, double* %loaded_a1 + store double 0.0, double* %arg_a0 + store double 0.0, double* %arg_a1 + store double 0.0, double* %noalias_arg_a0 + store double 0.0, double* %noalias_arg_a1 + store double 0.0, double* %escape_alloca_a0 + store double 0.0, double* %escape_alloca_a1 + store double 0.0, double* %noescape_alloca_a0 + store double 0.0, double* %noescape_alloca_a1 + store double 0.0, double* %normal_ret_a0 + store double 0.0, double* %normal_ret_a1 + store double 0.0, double* %noalias_ret_a0 + store double 0.0, double* %noalias_ret_a1 + ret void +} + +; CHECK: Function: caller_a: 16 pointers, 8 call sites +; CHECK: MayAlias: double* %arg_a0, double* %arg_a1 +; CHECK: NoAlias: double* %arg_a0, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %arg_a1, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %arg_a0, double* %noalias_arg_a1 +; CHECK: NoAlias: double* %arg_a1, double* %noalias_arg_a1 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %noalias_arg_a1 +; CHECK: MayAlias: double* %arg_a0, double** %indirect_a0 +; CHECK: MayAlias: double* %arg_a1, double** %indirect_a0 +; CHECK: NoAlias: double* %noalias_arg_a0, double** %indirect_a0 +; CHECK: NoAlias: double* %noalias_arg_a1, double** %indirect_a0 +; CHECK: MayAlias: double* %arg_a0, double** %indirect_a1 +; CHECK: MayAlias: double* %arg_a1, double** %indirect_a1 +; CHECK: NoAlias: double* %noalias_arg_a0, double** %indirect_a1 +; CHECK: NoAlias: double* %noalias_arg_a1, double** %indirect_a1 +; CHECK: MayAlias: double** %indirect_a0, double** %indirect_a1 +; CHECK: MayAlias: double* %arg_a0, double* %loaded_a0 +; CHECK: MayAlias: double* %arg_a1, double* %loaded_a0 +; CHECK: NoAlias: double* %loaded_a0, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %loaded_a0, double* %noalias_arg_a1 +; CHECK: MayAlias: double* %loaded_a0, double** %indirect_a0 +; CHECK: MayAlias: double* %loaded_a0, double** %indirect_a1 +; CHECK: MayAlias: double* %arg_a0, double* %loaded_a1 +; CHECK: MayAlias: double* %arg_a1, double* %loaded_a1 +; CHECK: NoAlias: double* %loaded_a1, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %loaded_a1, double* %noalias_arg_a1 +; CHECK: MayAlias: double* %loaded_a1, double** %indirect_a0 +; CHECK: MayAlias: double* %loaded_a1, double** %indirect_a1 +; CHECK: MayAlias: double* %loaded_a0, double* %loaded_a1 +; CHECK: NoAlias: double* %arg_a0, double* %escape_alloca_a0 +; CHECK: NoAlias: double* %arg_a1, double* %escape_alloca_a0 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noalias_arg_a1 +; CHECK: NoAlias: double* %escape_alloca_a0, double** %indirect_a0 +; CHECK: NoAlias: double* %escape_alloca_a0, double** %indirect_a1 +; CHECK: MayAlias: double* %escape_alloca_a0, double* %loaded_a0 +; CHECK: MayAlias: double* %escape_alloca_a0, double* %loaded_a1 +; CHECK: NoAlias: double* %arg_a0, double* %escape_alloca_a1 +; CHECK: NoAlias: double* %arg_a1, double* %escape_alloca_a1 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noalias_arg_a0 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noalias_arg_a1 +; CHECK: NoAlias: double* %escape_alloca_a1, double** %indirect_a0 +; CHECK: NoAlias: double* %escape_alloca_a1, double** %indirect_a1 +; CHECK: MayAlias: double* %escape_alloca_a1, double* %loaded_a0 +; CHECK: MayAlias: double* %escape_alloca_a1, double* %loaded_a1 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %escape_alloca_a1 +; CHECK: NoAlias: double* %arg_a0, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %arg_a1, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %noescape_alloca_a0, double** %indirect_a0 +; CHECK: NoAlias: double* %noescape_alloca_a0, double** %indirect_a1 +; CHECK: NoAlias: double* %loaded_a0, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %loaded_a1, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %arg_a0, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %arg_a1, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noescape_alloca_a1, double** %indirect_a0 +; CHECK: NoAlias: double* %noescape_alloca_a1, double** %indirect_a1 +; CHECK: NoAlias: double* %loaded_a0, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %loaded_a1, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noescape_alloca_a0, double* %noescape_alloca_a1 +; CHECK: MayAlias: double* %arg_a0, double* %normal_ret_a0 +; CHECK: MayAlias: double* %arg_a1, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %normal_ret_a0 +; CHECK: MayAlias: double* %normal_ret_a0, double** %indirect_a0 +; CHECK: MayAlias: double* %normal_ret_a0, double** %indirect_a1 +; CHECK: MayAlias: double* %loaded_a0, double* %normal_ret_a0 +; CHECK: MayAlias: double* %loaded_a1, double* %normal_ret_a0 +; CHECK: MayAlias: double* %escape_alloca_a0, double* %normal_ret_a0 +; CHECK: MayAlias: double* %escape_alloca_a1, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noescape_alloca_a0, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noescape_alloca_a1, double* %normal_ret_a0 +; CHECK: MayAlias: double* %arg_a0, double* %normal_ret_a1 +; CHECK: MayAlias: double* %arg_a1, double* %normal_ret_a1 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %normal_ret_a1 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %normal_ret_a1 +; CHECK: MayAlias: double* %normal_ret_a1, double** %indirect_a0 +; CHECK: MayAlias: double* %normal_ret_a1, double** %indirect_a1 +; CHECK: MayAlias: double* %loaded_a0, double* %normal_ret_a1 +; CHECK: MayAlias: double* %loaded_a1, double* %normal_ret_a1 +; CHECK: MayAlias: double* %escape_alloca_a0, double* %normal_ret_a1 +; CHECK: MayAlias: double* %escape_alloca_a1, double* %normal_ret_a1 +; CHECK: NoAlias: double* %noescape_alloca_a0, double* %normal_ret_a1 +; CHECK: NoAlias: double* %noescape_alloca_a1, double* %normal_ret_a1 +; CHECK: MayAlias: double* %normal_ret_a0, double* %normal_ret_a1 +; CHECK: NoAlias: double* %arg_a0, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %arg_a1, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %noalias_ret_a0, double** %indirect_a0 +; CHECK: NoAlias: double* %noalias_ret_a0, double** %indirect_a1 +; CHECK: NoAlias: double* %loaded_a0, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %loaded_a1, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noalias_ret_a0 +; CHECK: NoAlias: double* %noalias_ret_a0, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %noalias_ret_a0, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noalias_ret_a0, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noalias_ret_a0, double* %normal_ret_a1 +; CHECK: NoAlias: double* %arg_a0, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %arg_a1, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %noalias_arg_a0, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %noalias_arg_a1, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %noalias_ret_a1, double** %indirect_a0 +; CHECK: NoAlias: double* %noalias_ret_a1, double** %indirect_a1 +; CHECK: NoAlias: double* %loaded_a0, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %loaded_a1, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %escape_alloca_a0, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %escape_alloca_a1, double* %noalias_ret_a1 +; CHECK: NoAlias: double* %noalias_ret_a1, double* %noescape_alloca_a0 +; CHECK: NoAlias: double* %noalias_ret_a1, double* %noescape_alloca_a1 +; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a0 +; CHECK: NoAlias: double* %noalias_ret_a1, double* %normal_ret_a1 +; CHECK: NoAlias: double* %noalias_ret_a0, double* %noalias_ret_a1 +; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a0 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %normal_ret_a1 = call double* @normal_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a0 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %noalias_ret_a1 <-> %noalias_ret_a1 = call double* @noalias_returner() ; [#uses=1] +; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a0) +; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @callee(double* %escape_alloca_a1) +; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a0) +; CHECK: ModRef: Ptr: double* %arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_arg_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double** %indirect_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double** %indirect_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %loaded_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %loaded_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %escape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %escape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noescape_alloca_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %noescape_alloca_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %normal_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ModRef: Ptr: double* %normal_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a0 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: NoModRef: Ptr: double* %noalias_ret_a1 <-> call void @nocap_callee(double* %noescape_alloca_a1) +; CHECK: ===== Alias Analysis Evaluator Report ===== +; CHECK: 120 Total Alias Queries Performed +; CHECK: 84 no alias responses (70.0%) +; CHECK: 36 may alias responses (30.0%) +; CHECK: 0 must alias responses (0.0%) +; CHECK: Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0% +; CHECK: 128 Total ModRef Queries Performed +; CHECK: 44 no mod/ref responses (34.3%) +; CHECK: 0 mod responses (0.0%) +; CHECK: 0 ref responses (0.0%) +; CHECK: 84 mod & ref responses (65.6%) +; CHECK: Alias Analysis Evaluator Mod/Ref Summary: 34%/0%/0%/65% diff --git a/test/Analysis/BasicAA/unreachable-block.ll b/test/Analysis/BasicAA/unreachable-block.ll new file mode 100644 index 0000000..3382188 --- /dev/null +++ b/test/Analysis/BasicAA/unreachable-block.ll @@ -0,0 +1,16 @@ +; RUN: opt -aa-eval -disable-output < %s >& /dev/null + +; BasicAA shouldn't infinitely recurse on the use-def cycles in +; unreachable code. + +define void @func_2() nounwind { +entry: + unreachable + +bb: + %t = select i1 undef, i32* %t, i32* undef + %p = select i1 undef, i32* %p, i32* %p + %q = select i1 undef, i32* undef, i32* %p + %a = getelementptr i8* %a, i32 0 + unreachable +} diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll index 1626c1f..3542ad2 100644 --- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll @@ -22,5 +22,5 @@ afterfor: ; preds = %forinc, %entry ret i32 %j.0.lcssa } -; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %x)) smax (-1 + (-1 * %y)))) +; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll index e07aca2..866664a 100644 --- a/test/Analysis/ScalarEvolution/scev-aa.ll +++ b/test/Analysis/ScalarEvolution/scev-aa.ll @@ -1,8 +1,9 @@ ; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \ ; RUN: |& FileCheck %s -; At the time of this writing, -basicaa only misses the example of the form -; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references. +; At the time of this writing, -basicaa misses the example of the form +; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references, +; and the example of the form A[0] != A[i+1], where i+1 is known to be positive. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" @@ -189,6 +190,27 @@ define void @bar() { ret void } +; TODO: This is theoretically provable to be NoAlias. +; CHECK: Function: nonnegative: 2 pointers, 0 call sites +; CHECK: MayAlias: i64* %arrayidx, i64* %p + +define void @nonnegative(i64* %p) nounwind { +entry: + br label %for.body + +for.body: ; preds = %entry, %for.body + %i = phi i64 [ %inc, %for.body ], [ 0, %entry ] ; [#uses=2] + %inc = add nsw i64 %i, 1 ; [#uses=2] + %arrayidx = getelementptr inbounds i64* %p, i64 %inc + store i64 0, i64* %arrayidx + %tmp6 = load i64* %p ; [#uses=1] + %cmp = icmp slt i64 %inc, %tmp6 ; [#uses=1] + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body, %entry + ret void +} + ; CHECK: 13 no alias responses -; CHECK: 26 may alias responses +; CHECK: 27 may alias responses ; CHECK: 18 must alias responses diff --git a/test/Analysis/ScalarEvolution/trip-count10.ll b/test/Analysis/ScalarEvolution/trip-count10.ll index 0a992f9..546e1dc 100644 --- a/test/Analysis/ScalarEvolution/trip-count10.ll +++ b/test/Analysis/ScalarEvolution/trip-count10.ll @@ -74,3 +74,53 @@ loop: return: ret void } + +; Trip counts for non-polynomial iterations. It's theoretically possible +; to compute a maximum count for these, but short of that, ScalarEvolution +; should return unknown. + +; PR7416 +; CHECK: Determining loop execution counts for: @nonpolynomial +; CHECK-NEXT: Loop %loophead: Unpredictable backedge-taken count +; CHECK-NEXT: Loop %loophead: Unpredictable max backedge-taken count + +declare i1 @g() nounwind + +define void @nonpolynomial() { +entry: + br label %loophead +loophead: + %x = phi i32 [0, %entry], [%x.1, %bb1], [%x.2, %bb2] + %y = icmp slt i32 %x, 100 + br i1 %y, label %loopbody, label %retbb +loopbody: + %z = call i1 @g() + br i1 %z, label %bb1, label %bb2 +bb1: + %x.1 = add i32 %x, 2 + br label %loophead +bb2: + %x.2 = add i32 %x, 3 + br label %loophead +retbb: + ret void +} + +; PHI nodes with all constant operands. + +; CHECK: Determining loop execution counts for: @constant_phi_operands +; CHECK: Loop %loop: backedge-taken count is 1 +; CHECK: Loop %loop: max backedge-taken count is 1 + +define void @constant_phi_operands() nounwind { +entry: + br label %loop + +loop: + %i = phi i64 [ 1, %loop ], [ 0, %entry ] + %exitcond = icmp eq i64 %i, 1 + br i1 %exitcond, label %return, label %loop + +return: + ret void +} diff --git a/test/BugPoint/remove_arguments_test.ll b/test/BugPoint/remove_arguments_test.ll index fb17c78..439ea54 100644 --- a/test/BugPoint/remove_arguments_test.ll +++ b/test/BugPoint/remove_arguments_test.ll @@ -1,11 +1,17 @@ ; RUN: bugpoint %s -output-prefix %t -bugpoint-crashcalls -silence-passes +; RUN: llvm-dis %t-reduced-simplified.bc -o - | FileCheck %s ; Test to make sure that arguments are removed from the function if they are -; unnecessary. +; unnecessary. And clean up any types that that frees up too. + +; CHECK: target triple +; CHECK-NOT: struct.anon +%struct.anon = type { i32 } declare i32 @test2() -define i32 @test(i32 %A, i32 %B, float %C) { +; CHECK: define void @test() { +define i32 @test(i32 %A, %struct.anon* %B, float %C) { call i32 @test2() ret i32 %1 } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ab060c9..433af90 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -35,6 +35,7 @@ if(PYTHONINTERP_FOUND) -e "s#\@LLVM_BINARY_DIR\@#${LLVM_BINARY_DIR}#" -e "s#\@LLVM_TOOLS_DIR\@#${LLVM_TOOLS_BINARY_DIR}/${CMAKE_CFG_INTDIR}#" -e "s#\@LLVMGCCDIR\@##" + -e "s#\@PYTHON_EXECUTABLE\@#${PYTHON_EXECUTABLE}#" ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in > ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg COMMAND sed -e "s#\@LLVM_SOURCE_DIR\@#${LLVM_MAIN_SRC_DIR}#" diff --git a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll index f775c61..fd2f462 100644 --- a/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll +++ b/test/CodeGen/ARM/2008-02-04-LocalRegAllocBug.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=local ; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=fast ; PR1925 diff --git a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll index 8ef8c7b..44da8e7 100644 --- a/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll +++ b/test/CodeGen/ARM/2008-02-29-RegAllocLocal.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=local ; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=fast ; PR1925 diff --git a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll index 912e6f9..524b5eb 100644 --- a/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll +++ b/test/CodeGen/ARM/2009-05-07-RegAllocLocal.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=local ; RUN: llc < %s -mtriple=armv5-unknown-linux-gnueabi -O0 -regalloc=fast ; PR4100 @.str = external constant [30 x i8] ; <[30 x i8]*> [#uses=1] diff --git a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll index e068be7..7e9b066 100644 --- a/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll +++ b/test/CodeGen/ARM/2009-06-22-CoalescerBug.ll @@ -3,7 +3,7 @@ %struct.rtunion = type { i64 } %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] } -define arm_apcscc void @simplify_unary_real(i8* nocapture %p) nounwind { +define void @simplify_unary_real(i8* nocapture %p) nounwind { entry: %tmp121 = load i64* null, align 4 ; [#uses=1] %0 = getelementptr %struct.rtx_def* null, i32 0, i32 3, i32 3, i32 0 ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll index 17efe00..812f018 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert.ll @@ -8,11 +8,11 @@ @"\01LC16" = external constant [33 x i8], align 1 ; <[33 x i8]*> [#uses=1] @"\01LC17" = external constant [47 x i8], align 1 ; <[47 x i8]*> [#uses=1] -declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind +declare i32 @printf(i8* nocapture, ...) nounwind -declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind +declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb @@ -44,17 +44,17 @@ bb11: ; preds = %bb9 store i32 0, i32* @no_mat, align 4 store i32 0, i32* @no_mis, align 4 %3 = getelementptr i8* %B, i32 %0 ; [#uses=1] - tail call arm_apcscc void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind + tail call void @diff(i8* undef, i8* %3, i32 undef, i32 undef, i32 undef, i32 undef) nounwind %4 = sitofp i32 undef to double ; [#uses=1] %5 = fdiv double %4, 1.000000e+01 ; [#uses=1] - %6 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind ; [#uses=0] + %6 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([29 x i8]* @"\01LC12", i32 0, i32 0), double %5) nounwind ; [#uses=0] %7 = load i32* @al_len, align 4 ; [#uses=1] %8 = load i32* @no_mat, align 4 ; [#uses=1] %9 = load i32* @no_mis, align 4 ; [#uses=1] %10 = sub i32 %7, %8 ; [#uses=1] %11 = sub i32 %10, %9 ; [#uses=1] - %12 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind ; [#uses=0] - %13 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind ; [#uses=0] + %12 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC16", i32 0, i32 0), i32 %11) nounwind ; [#uses=0] + %13 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 undef) nounwind ; [#uses=0] br i1 undef, label %bb15, label %bb12 bb12: ; preds = %bb11 diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll index f520be3..f5fb97c 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert2.ll @@ -6,11 +6,11 @@ @"\01LC15" = external constant [33 x i8], align 1 ; <[33 x i8]*> [#uses=1] @"\01LC17" = external constant [47 x i8], align 1 ; <[47 x i8]*> [#uses=1] -declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind +declare i32 @printf(i8* nocapture, ...) nounwind -declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind +declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb @@ -41,11 +41,11 @@ bb11: ; preds = %bb9 store i32 0, i32* @no_mat, align 4 store i32 0, i32* @no_mis, align 4 %4 = getelementptr i8* %B, i32 %0 ; [#uses=1] - tail call arm_apcscc void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind - %5 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind ; [#uses=0] + tail call void @diff(i8* undef, i8* %4, i32 undef, i32 %3, i32 undef, i32 undef) nounwind + %5 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC11", i32 0, i32 0), i32 %tmp13) nounwind ; [#uses=0] %6 = load i32* @no_mis, align 4 ; [#uses=1] - %7 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind ; [#uses=0] - %8 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind ; [#uses=0] + %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([33 x i8]* @"\01LC15", i32 0, i32 0), i32 %6) nounwind ; [#uses=0] + %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([47 x i8]* @"\01LC17", i32 0, i32 0), i32 undef, i32 %1, i32 undef, i32 %2) nounwind ; [#uses=0] br i1 undef, label %bb15, label %bb12 bb12: ; preds = %bb11 diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll index eee6ff9..d7e4c90 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert3.ll @@ -2,7 +2,7 @@ @JJ = external global i32* ; [#uses=1] -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll index 93c92b1..77c133a 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert4.ll @@ -6,9 +6,9 @@ @no_mis = external global i32 ; [#uses=1] @name1 = external global i8* ; [#uses=1] -declare arm_apcscc void @diff(i8*, i8*, i32, i32, i32, i32) nounwind +declare void @diff(i8*, i8*, i32, i32, i32, i32) nounwind -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb @@ -35,7 +35,7 @@ bb11: ; preds = %bb9 store i32 0, i32* @no_mis, align 4 %1 = getelementptr i8* %A, i32 0 ; [#uses=1] %2 = getelementptr i8* %B, i32 0 ; [#uses=1] - tail call arm_apcscc void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind + tail call void @diff(i8* %1, i8* %2, i32 undef, i32 undef, i32 undef, i32 undef) nounwind br i1 undef, label %bb15, label %bb12 bb12: ; preds = %bb11 diff --git a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll index 277283d..16f5d1d 100644 --- a/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll +++ b/test/CodeGen/ARM/2009-06-30-RegScavengerAssert5.ll @@ -2,7 +2,7 @@ @XX = external global i32* ; [#uses=1] -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb diff --git a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll index 5c0e5fa..f0d79ce 100644 --- a/test/CodeGen/ARM/2009-07-01-CommuteBug.ll +++ b/test/CodeGen/ARM/2009-07-01-CommuteBug.ll @@ -4,7 +4,7 @@ @II = external global i32* ; [#uses=1] @JJ = external global i32* ; [#uses=1] -define arm_apcscc void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { +define void @SIM(i8* %A, i8* %B, i32 %M, i32 %N, i32 %K, [256 x i32]* %V, i32 %Q, i32 %R, i32 %nseq) nounwind { entry: br i1 undef, label %bb5, label %bb diff --git a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll index 2b7ccd8..454fee5 100644 --- a/test/CodeGen/ARM/2009-07-18-RewriterBug.ll +++ b/test/CodeGen/ARM/2009-07-18-RewriterBug.ll @@ -8,7 +8,7 @@ @_2E_str7 = internal constant [21 x i8] c"ERROR: Only 1 point!\00", section "__TEXT,__cstring,cstring_literals", align 1 ; <[21 x i8]*> [#uses=1] @llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.EDGE_PAIR*, %struct.VERTEX*, %struct.VERTEX*)* @build_delaunay to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] -define arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind { +define void @build_delaunay(%struct.EDGE_PAIR* noalias nocapture sret %agg.result, %struct.VERTEX* %tree, %struct.VERTEX* %extra) nounwind { entry: %delright = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3] %delleft = alloca %struct.EDGE_PAIR, align 8 ; <%struct.EDGE_PAIR*> [#uses=3] @@ -29,10 +29,10 @@ bb1.i: ; preds = %bb1.i, %bb br i1 %6, label %get_low.exit, label %bb1.i get_low.exit: ; preds = %bb1.i - call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind + call void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delright, %struct.VERTEX* %2, %struct.VERTEX* %extra) nounwind %7 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1] %8 = load %struct.VERTEX** %7, align 4 ; <%struct.VERTEX*> [#uses=1] - call arm_apcscc void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind + call void @build_delaunay(%struct.EDGE_PAIR* noalias sret %delleft, %struct.VERTEX* %8, %struct.VERTEX* %tree) nounwind %9 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 0 ; <%struct.edge_rec**> [#uses=1] %10 = load %struct.edge_rec** %9, align 8 ; <%struct.edge_rec*> [#uses=2] %11 = getelementptr %struct.EDGE_PAIR* %delleft, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] @@ -141,7 +141,7 @@ bb5.i: ; preds = %bb3.i %85 = inttoptr i32 %84 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] %86 = getelementptr %struct.edge_rec* %ldi_addr.0.i, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] %87 = load %struct.VERTEX** %86, align 4 ; <%struct.VERTEX*> [#uses=1] - %88 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=6] + %88 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=6] %89 = getelementptr %struct.edge_rec* %88, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] store %struct.edge_rec* %88, %struct.edge_rec** %89, align 4 %90 = getelementptr %struct.edge_rec* %88, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=2] @@ -780,7 +780,7 @@ bb24.i: ; preds = %bb23.i, %bb21.i %592 = and i32 %589, -64 ; [#uses=1] %593 = or i32 %591, %592 ; [#uses=1] %594 = inttoptr i32 %593 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] - %595 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] + %595 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] %596 = getelementptr %struct.edge_rec* %595, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] store %struct.edge_rec* %595, %struct.edge_rec** %596, align 4 %597 = getelementptr %struct.edge_rec* %595, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] @@ -882,7 +882,7 @@ bb25.i: ; preds = %bb23.i, %bb22.i %677 = and i32 %674, -64 ; [#uses=1] %678 = or i32 %676, %677 ; [#uses=1] %679 = inttoptr i32 %678 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] - %680 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %680 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] %681 = getelementptr %struct.edge_rec* %680, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=5] store %struct.edge_rec* %680, %struct.edge_rec** %681, align 4 %682 = getelementptr %struct.edge_rec* %680, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] @@ -1005,15 +1005,15 @@ bb7: ; preds = %bb %762 = getelementptr %struct.VERTEX* %tree, i32 0, i32 1 ; <%struct.VERTEX**> [#uses=1] %763 = load %struct.VERTEX** %762, align 4 ; <%struct.VERTEX*> [#uses=4] %764 = icmp eq %struct.VERTEX* %763, null ; [#uses=1] - %765 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] + %765 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=5] %766 = getelementptr %struct.edge_rec* %765, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] store %struct.edge_rec* %765, %struct.edge_rec** %766, align 4 %767 = getelementptr %struct.edge_rec* %765, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=3] br i1 %764, label %bb10, label %bb11 bb8: ; preds = %entry - %768 = call arm_apcscc i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; [#uses=0] - call arm_apcscc void @exit(i32 -1) noreturn nounwind + %768 = call i32 @puts(i8* getelementptr ([21 x i8]* @_2E_str7, i32 0, i32 0)) nounwind ; [#uses=0] + call void @exit(i32 -1) noreturn nounwind unreachable bb10: ; preds = %bb7 @@ -1053,7 +1053,7 @@ bb11: ; preds = %bb7 store %struct.VERTEX* %tree, %struct.VERTEX** %790, align 4 %791 = getelementptr %struct.edge_rec* %785, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=1] store %struct.edge_rec* %783, %struct.edge_rec** %791, align 4 - %792 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %792 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] %793 = getelementptr %struct.edge_rec* %792, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=4] store %struct.edge_rec* %792, %struct.edge_rec** %793, align 4 %794 = getelementptr %struct.edge_rec* %792, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] @@ -1117,7 +1117,7 @@ bb11: ; preds = %bb7 %843 = or i32 %841, %842 ; [#uses=1] %844 = inttoptr i32 %843 to %struct.edge_rec* ; <%struct.edge_rec*> [#uses=1] %845 = load %struct.VERTEX** %767, align 4 ; <%struct.VERTEX*> [#uses=1] - %846 = call arm_apcscc %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] + %846 = call %struct.edge_rec* @alloc_edge() nounwind ; <%struct.edge_rec*> [#uses=4] %847 = getelementptr %struct.edge_rec* %846, i32 0, i32 1 ; <%struct.edge_rec**> [#uses=7] store %struct.edge_rec* %846, %struct.edge_rec** %847, align 4 %848 = getelementptr %struct.edge_rec* %846, i32 0, i32 0 ; <%struct.VERTEX**> [#uses=1] @@ -1316,8 +1316,8 @@ bb15: ; preds = %bb14, %bb13, %bb11, %bb10, %bb6 ret void } -declare arm_apcscc i32 @puts(i8* nocapture) nounwind +declare i32 @puts(i8* nocapture) nounwind -declare arm_apcscc void @exit(i32) noreturn nounwind +declare void @exit(i32) noreturn nounwind -declare arm_apcscc %struct.edge_rec* @alloc_edge() nounwind +declare %struct.edge_rec* @alloc_edge() nounwind diff --git a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll index b4b989b..d477ba98 100644 --- a/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-07-22-ScavengerAssert.ll @@ -6,9 +6,9 @@ %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 } %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 } -declare arm_apcscc i32 @strlen(i8* nocapture) nounwind readonly +declare i32 @strlen(i8* nocapture) nounwind readonly -define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { +define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { entry: br i1 undef, label %bb126, label %bb1 @@ -86,7 +86,7 @@ bb52: ; preds = %cli_calloc.exit %0 = load i16* undef, align 4 ; [#uses=1] %1 = icmp eq i16 %0, 0 ; [#uses=1] %iftmp.20.0 = select i1 %1, i8* %hexsig, i8* null ; [#uses=1] - %2 = tail call arm_apcscc i32 @strlen(i8* %iftmp.20.0) nounwind readonly ; [#uses=0] + %2 = tail call i32 @strlen(i8* %iftmp.20.0) nounwind readonly ; [#uses=0] unreachable bb126: ; preds = %entry diff --git a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll index 24f4990..6761687 100644 --- a/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll +++ b/test/CodeGen/ARM/2009-07-22-SchedulerAssert.ll @@ -6,7 +6,7 @@ %struct.cli_bm_patt = type { i8*, i8*, i16, i16, i8*, i8*, i8, %struct.cli_bm_patt*, i16 } %struct.cli_matcher = type { i16, i8, i8*, %struct.cli_bm_patt**, i32*, i32, i8, i8, %struct.cli_ac_node*, %struct.cli_ac_node**, %struct.cli_ac_patt**, i32, i32, i32 } -define arm_apcscc i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { +define i32 @cli_ac_addsig(%struct.cli_matcher* nocapture %root, i8* %virname, i8* %hexsig, i32 %sigid, i16 zeroext %parts, i16 zeroext %partno, i16 zeroext %type, i32 %mindist, i32 %maxdist, i8* %offset, i8 zeroext %target) nounwind { entry: br i1 undef, label %bb126, label %bb1 diff --git a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll index e1d19d1..5003fbd 100644 --- a/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll +++ b/test/CodeGen/ARM/2009-07-29-VFP3Registers.ll @@ -4,7 +4,7 @@ declare double @llvm.exp.f64(double) nounwind readonly -define arm_apcscc void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind { +define void @findratio(double* nocapture %res1, double* nocapture %res2) nounwind { entry: br label %bb diff --git a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll index 2d4e58d..a656c49 100644 --- a/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll +++ b/test/CodeGen/ARM/2009-08-02-RegScavengerAssert-Neon.ll @@ -4,7 +4,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" target triple = "armv7-apple-darwin9" -define arm_apcscc <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind { +define <4 x i32> @scale(<4 x i32> %v, i32 %f) nounwind { entry: %v_addr = alloca <4 x i32> ; <<4 x i32>*> [#uses=2] %f_addr = alloca i32 ; [#uses=2] diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll index 65ffed2..3097522 100644 --- a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll +++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert-2.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv6-elf +; RUN: llc < %s -mtriple=arm-linux-gnueabi ; PR4528 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64" target triple = "armv6-elf" -define arm_aapcscc i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize { +define i32 @file_read_actor(i32* nocapture %desc, i32* %page, i32 %offset, i32 %size) nounwind optsize { entry: br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i @@ -26,8 +26,8 @@ bb2: ; preds = %fault_in_pages_writeable.exit unreachable bb3: ; preds = %fault_in_pages_writeable.exit - %1 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] + %1 = tail call i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] unreachable } -declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32) +declare i32 @__copy_to_user(i8*, i8*, i32) diff --git a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll index 9e5372a..d666f12 100644 --- a/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-08-04-RegScavengerAssert.ll @@ -1,7 +1,7 @@ -; RUN: llc < %s -mtriple=armv6-elf +; RUN: llc < %s -mtriple=arm-linux-gnueabi ; PR4528 -define arm_aapcscc i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize { +define i32 @file_read_actor(i32 %desc, i32 %page, i32 %offset, i32 %size) nounwind optsize { entry: br i1 undef, label %fault_in_pages_writeable.exit, label %bb5.i @@ -18,8 +18,8 @@ bb2: ; preds = %fault_in_pages_writeable.exit unreachable bb3: ; preds = %fault_in_pages_writeable.exit - %2 = tail call arm_aapcscc i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] + %2 = tail call i32 @__copy_to_user(i8* undef, i8* undef, i32 undef) nounwind ; [#uses=0] unreachable } -declare arm_aapcscc i32 @__copy_to_user(i8*, i8*, i32) +declare i32 @__copy_to_user(i8*, i8*, i32) diff --git a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll index 18d68f7..4b41015 100644 --- a/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll +++ b/test/CodeGen/ARM/2009-08-15-RegScavenger-EarlyClobber.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm +; RUN: llc < %s -mtriple=arm-linux-gnueabi ; PR4528 ; Inline asm is allowed to contain operands "=&r", "0". @@ -6,7 +6,7 @@ %struct.device_dma_parameters = type { i32, i32 } %struct.iovec = type { i8*, i32 } -define arm_aapcscc i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize { +define i32 @generic_segment_checks(%struct.iovec* nocapture %iov, i32* nocapture %nr_segs, i32* nocapture %count, i32 %access_flags) nounwind optsize { entry: br label %bb8 diff --git a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll index a46482c..2993647 100644 --- a/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll +++ b/test/CodeGen/ARM/2009-08-15-RegScavengerAssert.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -march=arm +; RUN: llc < %s -mtriple=arm-linux-gnueabi ; PR4716 -define arm_aapcscc void @_start() nounwind naked { +define void @_start() nounwind naked { entry: - tail call arm_aapcscc void @exit(i32 undef) noreturn nounwind + tail call void @exit(i32 undef) noreturn nounwind unreachable } -declare arm_aapcscc void @exit(i32) noreturn nounwind +declare void @exit(i32) noreturn nounwind diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll index 84915c4..c598fe6 100644 --- a/test/CodeGen/ARM/2009-08-21-PostRAKill.ll +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill.ll @@ -7,7 +7,7 @@ target triple = "armv7-apple-darwin9" %struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* } @g = common global %struct.tree* null -define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind { +define %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind { entry: %t.idx51.val.i = load double* null ; [#uses=1] br i1 undef, label %bb4.i, label %bb.i diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll index a21ffc3..cc92c26 100644 --- a/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill2.ll @@ -9,7 +9,7 @@ target triple = "armv7-apple-darwin9" %struct.icstruct = type { [3 x i32], i16 } %struct.node = type { i16, double, [3 x double], i32, i32 } -declare arm_apcscc double @floor(double) nounwind readnone +declare double @floor(double) nounwind readnone define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) { entry: @@ -28,7 +28,7 @@ bb7: ; preds = %bb3 br i1 %a, label %bb11, label %bb9 bb9: ; preds = %bb7 - %0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; [#uses=0] + %0 = tail call double @floor(double %b) nounwind readnone ; [#uses=0] br label %bb11 bb11: ; preds = %bb9, %bb7 diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll index e3d8ea6..90a4a42 100644 --- a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll @@ -9,7 +9,7 @@ target triple = "armv7-apple-darwin9" %struct.Patient = type { i32, i32, i32, %struct.Village* } %struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 } -define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind { +define %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind { entry: br i1 %p, label %bb8, label %bb1 diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll index 9123377..5cfc68d 100644 --- a/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll +++ b/test/CodeGen/ARM/2009-08-21-PostRAKill4.ll @@ -8,19 +8,19 @@ target triple = "armv7-apple-darwin9" @.str1 = external constant [31 x i8], align 1 ; <[31 x i8]*> [#uses=1] @.str2 = external constant [4 x i8], align 1 ; <[4 x i8]*> [#uses=1] -declare arm_apcscc i32 @getUnknown(i32, ...) nounwind +declare i32 @getUnknown(i32, ...) nounwind declare void @llvm.va_start(i8*) nounwind declare void @llvm.va_end(i8*) nounwind -declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind +declare i32 @printf(i8* nocapture, ...) nounwind -define arm_apcscc i32 @main() nounwind { +define i32 @main() nounwind { entry: - %0 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; [#uses=0] - %1 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; [#uses=0] - %2 = tail call arm_apcscc i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; [#uses=1] - %3 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; [#uses=0] + %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 1, i32 1, i32 1, i32 1, i32 1, i32 1) nounwind ; [#uses=0] + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([31 x i8]* @.str1, i32 0, i32 0), i32 -128, i32 116, i32 116, i32 -3852, i32 -31232, i32 -1708916736) nounwind ; [#uses=0] + %2 = tail call i32 (i32, ...)* @getUnknown(i32 undef, i32 116, i32 116, i32 -3852, i32 -31232, i32 30556, i32 -1708916736) nounwind ; [#uses=1] + %3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([4 x i8]* @.str2, i32 0, i32 0), i32 %2) nounwind ; [#uses=0] ret i32 0 } diff --git a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll index 0fad533..392c70a 100644 --- a/test/CodeGen/ARM/2009-08-23-linkerprivate.ll +++ b/test/CodeGen/ARM/2009-08-23-linkerprivate.ll @@ -2,7 +2,7 @@ ; ModuleID = '/Volumes/MacOS9/tests/WebKit/JavaScriptCore/profiler/ProfilerServer.mm' -@"\01l_objc_msgSend_fixup_alloc" = linker_private hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; [#uses=0] +@"\01l_objc_msgSend_fixup_alloc" = linker_private_weak hidden global i32 0, section "__DATA, __objc_msgrefs, coalesced", align 16 ; CHECK: .globl l_objc_msgSend_fixup_alloc ; CHECK: .weak_definition l_objc_msgSend_fixup_alloc diff --git a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll index c6ef256..5407013 100644 --- a/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll +++ b/test/CodeGen/ARM/2009-08-26-ScalarToVector.ll @@ -10,7 +10,7 @@ target triple = "thumbv7-elf" declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone -define arm_apcscc void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) { +define void @_ZN6squish10ClusterFit9Compress3EPv(%quuz* %this, i8* %block) { entry: %0 = lshr <4 x i32> zeroinitializer, ; <<4 x i32>> [#uses=1] %1 = shufflevector <4 x i32> %0, <4 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] diff --git a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll index bc5bfe9..cac8569 100644 --- a/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll +++ b/test/CodeGen/ARM/2009-08-27-ScalarToVector.ll @@ -8,7 +8,7 @@ target triple = "thumbv7-elf" %quux = type { i32 (...)**, %baz*, i32 } %quuz = type { %quux, i32, %bar, [128 x i8], [16 x %foo], %foo, %foo, %foo } -define arm_apcscc void @aaaa(%quuz* %this, i8* %block) { +define void @aaaa(%quuz* %this, i8* %block) { entry: br i1 undef, label %bb.nph269, label %bb201 diff --git a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll index d5178b4..5bd30ea 100644 --- a/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll +++ b/test/CodeGen/ARM/2009-08-29-ExtractEltf32.ll @@ -2,7 +2,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" target triple = "thumbv7-elf" -define arm_apcscc void @foo() nounwind { +define void @foo() nounwind { entry: %0 = tail call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> undef, <2 x float> undef) nounwind ; <<2 x float>> [#uses=1] %tmp28 = extractelement <2 x float> %0, i32 0 ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll index 266fce6..4655962 100644 --- a/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll +++ b/test/CodeGen/ARM/2009-08-29-TooLongSplat.ll @@ -2,7 +2,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" target triple = "thumbv7-elf" -define arm_apcscc void @aaa() nounwind { +define void @aaa() nounwind { entry: %0 = fmul <4 x float> undef, ; <<4 x float>> [#uses=1] %tmp31 = extractelement <4 x float> %0, i32 0 ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll index c0ad65f..397eba4 100644 --- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll +++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll @@ -2,7 +2,7 @@ %struct.A = type { i32* } -define arm_apcscc void @"\01-[MyFunction Name:]"() { +define void @"\01-[MyFunction Name:]"() { entry: %save_filt.1 = alloca i32 ; [#uses=2] %save_eptr.0 = alloca i8* ; [#uses=2] @@ -10,12 +10,12 @@ entry: %eh_exception = alloca i8* ; [#uses=5] %eh_selector = alloca i32 ; [#uses=3] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] - call arm_apcscc void @_ZN1AC1Ev(%struct.A* %a) - invoke arm_apcscc void @_Z3barv() + call void @_ZN1AC1Ev(%struct.A* %a) + invoke void @_Z3barv() to label %invcont unwind label %lpad invcont: ; preds = %entry - call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind + call void @_ZN1AD1Ev(%struct.A* %a) nounwind br label %return bb: ; preds = %ppad @@ -23,7 +23,7 @@ bb: ; preds = %ppad store i32 %eh_select, i32* %save_filt.1, align 4 %eh_value = load i8** %eh_exception ; [#uses=1] store i8* %eh_value, i8** %save_eptr.0, align 4 - call arm_apcscc void @_ZN1AD1Ev(%struct.A* %a) nounwind + call void @_ZN1AD1Ev(%struct.A* %a) nounwind %0 = load i8** %save_eptr.0, align 4 ; [#uses=1] store i8* %0, i8** %eh_exception, align 4 %1 = load i32* %save_filt.1, align 4 ; [#uses=1] @@ -46,16 +46,16 @@ ppad: ; preds = %lpad Unwind: ; preds = %bb %eh_ptr3 = load i8** %eh_exception ; [#uses=1] - call arm_apcscc void @_Unwind_SjLj_Resume(i8* %eh_ptr3) + call void @_Unwind_SjLj_Resume(i8* %eh_ptr3) unreachable } -define linkonce_odr arm_apcscc void @_ZN1AC1Ev(%struct.A* %this) { +define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) { entry: %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] store %struct.A* %this, %struct.A** %this_addr - %0 = call arm_apcscc i8* @_Znwm(i32 4) ; [#uses=1] + %0 = call i8* @_Znwm(i32 4) ; [#uses=1] %1 = bitcast i8* %0 to i32* ; [#uses=1] %2 = load %struct.A** %this_addr, align 4 ; <%struct.A*> [#uses=1] %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; [#uses=1] @@ -66,9 +66,9 @@ return: ; preds = %entry ret void } -declare arm_apcscc i8* @_Znwm(i32) +declare i8* @_Znwm(i32) -define linkonce_odr arm_apcscc void @_ZN1AD1Ev(%struct.A* %this) nounwind { +define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind { entry: %this_addr = alloca %struct.A* ; <%struct.A**> [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] @@ -77,7 +77,7 @@ entry: %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; [#uses=1] %2 = load i32** %1, align 4 ; [#uses=1] %3 = bitcast i32* %2 to i8* ; [#uses=1] - call arm_apcscc void @_ZdlPv(i8* %3) nounwind + call void @_ZdlPv(i8* %3) nounwind br label %bb bb: ; preds = %entry @@ -88,9 +88,9 @@ return: ; preds = %bb } ;CHECK: L_LSDA_0: -declare arm_apcscc void @_ZdlPv(i8*) nounwind +declare void @_ZdlPv(i8*) nounwind -declare arm_apcscc void @_Z3barv() +declare void @_Z3barv() declare i8* @llvm.eh.exception() nounwind @@ -98,6 +98,6 @@ declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind -declare arm_apcscc i32 @__gxx_personality_sj0(...) +declare i32 @__gxx_personality_sj0(...) -declare arm_apcscc void @_Unwind_SjLj_Resume(i8*) +declare void @_Unwind_SjLj_Resume(i8*) diff --git a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll index bf91fe0..06a152d 100644 --- a/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll +++ b/test/CodeGen/ARM/2009-09-01-PostRAProlog.ll @@ -30,11 +30,11 @@ target triple = "thumbv7-apple-darwin9" @.str218 = private constant [6 x i8] c"%7d%c\00", align 1 ; <[6 x i8]*> [#uses=1] @.str319 = private constant [30 x i8] c"Failed to allocate %u bytes.\0A\00", align 1 ; <[30 x i8]*> [#uses=1] -declare arm_apcscc i32 @puts(i8* nocapture) nounwind +declare i32 @puts(i8* nocapture) nounwind -declare arm_apcscc i32 @getchar() nounwind +declare i32 @getchar() nounwind -define internal arm_apcscc i32 @transpose() nounwind readonly { +define internal i32 @transpose() nounwind readonly { ; CHECK: push entry: %0 = load i32* getelementptr inbounds ([128 x i32]* @columns, i32 0, i32 1), align 4 ; [#uses=1] @@ -101,6 +101,6 @@ bb7: ; preds = %bb5 ret i32 -128 } -declare arm_apcscc noalias i8* @calloc(i32, i32) nounwind +declare noalias i8* @calloc(i32, i32) nounwind declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind diff --git a/test/CodeGen/ARM/2009-09-09-AllOnes.ll b/test/CodeGen/ARM/2009-09-09-AllOnes.ll index f654a16..8522a77 100644 --- a/test/CodeGen/ARM/2009-09-09-AllOnes.ll +++ b/test/CodeGen/ARM/2009-09-09-AllOnes.ll @@ -2,7 +2,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32" target triple = "thumbv7-elf" -define arm_apcscc void @foo() { +define void @foo() { entry: %0 = insertelement <4 x i32> undef, i32 -1, i32 3 store <4 x i32> %0, <4 x i32>* undef, align 16 diff --git a/test/CodeGen/ARM/2009-09-24-spill-align.ll b/test/CodeGen/ARM/2009-09-24-spill-align.ll index 5476d5f..8bfd026 100644 --- a/test/CodeGen/ARM/2009-09-24-spill-align.ll +++ b/test/CodeGen/ARM/2009-09-24-spill-align.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s ; pr4926 -define arm_apcscc void @test_vget_lanep16() nounwind { +define void @test_vget_lanep16() nounwind { entry: %arg0_poly16x4_t = alloca <4 x i16> ; <<4 x i16>*> [#uses=1] %out_poly16_t = alloca i16 ; [#uses=1] diff --git a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll index 53bd668..4aa879d 100644 --- a/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll +++ b/test/CodeGen/ARM/2009-09-28-LdStOptiBug.ll @@ -3,7 +3,7 @@ %0 = type { double, double } -define arm_aapcscc void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind { +define void @foo(%0* noalias nocapture sret %agg.result, double %x.0, double %y.0) nounwind { ; CHECK: foo: ; CHECK: bl __adddf3 ; CHECK-NOT: strd diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index f17d059..c31b116 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -2,13 +2,13 @@ @.str = private constant [1 x i8] zeroinitializer, align 1 -define arm_aapcscc void @g() { +define void @g() { entry: ;CHECK: [sp, #8] ;CHECK: [sp, #12] ;CHECK: [sp] - tail call arm_aapcscc void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) + tail call void (i8*, ...)* @f(i8* getelementptr ([1 x i8]* @.str, i32 0, i32 0), i32 1, double 2.000000e+00, i32 3, double 4.000000e+00) ret void } -declare arm_aapcscc void @f(i8*, ...) +declare void @f(i8*, ...) diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll index 62f3786..34f7519 100644 --- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll +++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll @@ -11,11 +11,11 @@ entry: %0 = getelementptr inbounds %foo* %quat_addr, i32 0, i32 0 ; <<4 x float>*> [#uses=1] store <4 x float> %quat.0, <4 x float>* %0 %1 = call arm_aapcs_vfpcc <4 x float> @quux(%foo* %quat_addr) nounwind ; <<4 x float>> [#uses=3] -;CHECK: vmov.f32 -;CHECK: vmov.f32 %2 = fmul <4 x float> %1, %1 ; <<4 x float>> [#uses=2] %3 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] %4 = shufflevector <4 x float> %2, <4 x float> undef, <2 x i32> ; <<2 x float>> [#uses=1] +;CHECK-NOT: vmov +;CHECK: vpadd %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2] %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2] %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> ; <<4 x float>> [#uses=2] diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll index a737591..198faeb 100644 --- a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll +++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll @@ -6,7 +6,7 @@ target triple = "armv7-apple-darwin10" %struct.int16x8_t = type { <8 x i16> } %struct.int16x8x2_t = type { [2 x %struct.int16x8_t] } -define arm_apcscc void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind { +define void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind { entry: ;CHECK: vtrn.16 %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> diff --git a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll index 71e0b0a..89d6a68 100644 --- a/test/CodeGen/ARM/2010-04-09-NeonSelect.ll +++ b/test/CodeGen/ARM/2010-04-09-NeonSelect.ll @@ -1,7 +1,7 @@ ; RUN: llc -march=arm -mattr=+neon < %s ; Radar 7770501: Don't crash on SELECT and SELECT_CC with NEON vector values. -define arm_apcscc void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind { +define void @vDSP_FFT16_copv(float* nocapture %O, float* nocapture %I, i32 %Direction) nounwind { entry: %.22 = select i1 undef, <4 x float> undef, <4 x float> zeroinitializer ; <<4 x float>> [#uses=1] %0 = fadd <4 x float> undef, %.22 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll b/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll index 4f71b83..1354c79 100644 --- a/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll +++ b/test/CodeGen/ARM/2010-04-13-v2f64SplitArg.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 ; Radar 7855014 -define arm_apcscc void @test1(i32 %f0, i32 %f1, i32 %f2, <4 x i32> %f3) nounwind { +define void @test1(i32 %f0, i32 %f1, i32 %f2, <4 x i32> %f3) nounwind { entry: unreachable } diff --git a/test/CodeGen/ARM/2010-04-14-SplitVector.ll b/test/CodeGen/ARM/2010-04-14-SplitVector.ll index 42f9852..5d0c3cf 100644 --- a/test/CodeGen/ARM/2010-04-14-SplitVector.ll +++ b/test/CodeGen/ARM/2010-04-14-SplitVector.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm -mcpu=arm1136jf-s ; Radar 7854640 -define arm_apcscc void @test() nounwind { +define void @test() nounwind { bb: br i1 undef, label %bb9, label %bb10 diff --git a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll index ed7bca8..05581c3 100644 --- a/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll +++ b/test/CodeGen/ARM/2010-04-15-ScavengerDebugValue.ll @@ -3,7 +3,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32" target triple = "armv4t-apple-darwin10" -define hidden arm_apcscc i32 @__addvsi3(i32 %a, i32 %b) nounwind { +define hidden i32 @__addvsi3(i32 %a, i32 %b) nounwind { entry: tail call void @llvm.dbg.value(metadata !{i32 %b}, i64 0, metadata !0) %0 = add nsw i32 %b, %a, !dbg !9 ; [#uses=1] diff --git a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll index b158afd..9461643 100644 --- a/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll +++ b/test/CodeGen/ARM/2010-05-18-LocalAllocCrash.ll @@ -1,4 +1,3 @@ -; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=local ; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast ; rdar://problem/7948106 ;; This test would spill %R4 before the call to zz, but it forgot to move the @@ -11,7 +10,7 @@ target triple = "armv6-apple-darwin" @.str = external constant [1 x i8] ; <[1 x i8]*> [#uses=1] -define arm_apcscc void @yy(%struct.q* %qq) nounwind { +define void @yy(%struct.q* %qq) nounwind { entry: %vla6 = alloca i8, i32 undef, align 1 ; [#uses=1] %vla10 = alloca i8, i32 undef, align 1 ; [#uses=1] @@ -20,18 +19,18 @@ entry: %tmp21 = load i32* undef ; [#uses=1] %0 = mul i32 1, %tmp21 ; [#uses=1] %vla22 = alloca i8, i32 %0, align 1 ; [#uses=1] - call arm_apcscc void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1) + call void (...)* @zz(i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0), i32 2, i32 1) br i1 undef, label %if.then, label %if.end36 if.then: ; preds = %entry - %call = call arm_apcscc i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; [#uses=0] - %call35 = call arm_apcscc i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; [#uses=0] + %call = call i32 (...)* @x(%struct.q* undef, i8* undef, i8* %vla6, i8* %vla10, i32 undef) ; [#uses=0] + %call35 = call i32 (...)* @x(%struct.q* undef, i8* %vla14, i8* %vla18, i8* %vla22, i32 undef) ; [#uses=0] unreachable if.end36: ; preds = %entry ret void } -declare arm_apcscc void @zz(...) +declare void @zz(...) -declare arm_apcscc i32 @x(...) +declare i32 @x(...) diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index 9907228..5ad1c09 100644 --- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -4,7 +4,7 @@ %struct.foo = type { i64, i64 } -define arm_apcscc zeroext i8 @t(%struct.foo* %this) noreturn optsize { +define zeroext i8 @t(%struct.foo* %this) noreturn optsize { entry: ; ARM: t: ; ARM: str r0, [r1], r0 diff --git a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll index b6fbf9b..ff60fa8 100644 --- a/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll +++ b/test/CodeGen/ARM/2010-05-20-NEONSpillCrash.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -O0 +; RUN: llc < %s -march=arm -mattr=+neon -O0 -regalloc=linearscan ; This test would crash the rewriter when trying to handle a spill after one of ; the @llvm.arm.neon.vld3.v8i8 defined three parts of a register. diff --git a/test/CodeGen/ARM/2010-05-21-BuildVector.ll b/test/CodeGen/ARM/2010-05-21-BuildVector.ll index 6b19490..ce959d1 100644 --- a/test/CodeGen/ARM/2010-05-21-BuildVector.ll +++ b/test/CodeGen/ARM/2010-05-21-BuildVector.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s ; Radar 7872877 -define arm_apcscc void @test(float* %fltp, i32 %packedValue, float* %table) nounwind { +define void @test(float* %fltp, i32 %packedValue, float* %table) nounwind { entry: %0 = load float* %fltp %1 = insertelement <4 x float> undef, float %0, i32 0 diff --git a/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll new file mode 100644 index 0000000..e4f2099 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-11-vmovdrr-bitcast.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s -march=arm -mattr=+neon +; Radar 8084742 + +%struct.__int8x8x2_t = type { [2 x <8 x i8>] } + +define void @foo(%struct.__int8x8x2_t* nocapture %a, i8* %b) nounwind { +entry: + %0 = bitcast %struct.__int8x8x2_t* %a to i128* ; [#uses=1] + %srcval = load i128* %0, align 8 ; [#uses=2] + %tmp6 = trunc i128 %srcval to i64 ; [#uses=1] + %tmp8 = lshr i128 %srcval, 64 ; [#uses=1] + %tmp9 = trunc i128 %tmp8 to i64 ; [#uses=1] + %tmp16.i = bitcast i64 %tmp6 to <8 x i8> ; <<8 x i8>> [#uses=1] + %tmp20.i = bitcast i64 %tmp9 to <8 x i8> ; <<8 x i8>> [#uses=1] + tail call void @llvm.arm.neon.vst2.v8i8(i8* %b, <8 x i8> %tmp16.i, <8 x i8> %tmp20.i) nounwind + ret void +} + +declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll new file mode 100644 index 0000000..816a6d4 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-21-LdStMultipleBug.ll @@ -0,0 +1,148 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin -O3 -mcpu=arm1136jf-s +; PR7421 + +%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } +%struct.FILE = type { i8* } +%struct.tilebox = type { %struct.tilebox*, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.UNCOMBOX = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } +%struct.cellbox = type { i8*, i32, i32, i32, [9 x i32], i32, i32, i32, i32, i32, i32, i32, double, double, double, double, double, i32, i32, %struct.CONTENTBOX*, %struct.UNCOMBOX*, [8 x %struct.tilebox*] } +%struct.termbox = type { %struct.termbox*, i32, i32, i32, i32, i32 } + +@.str2708 = external constant [14 x i8], align 4 ; <[14 x i8]*> [#uses=1] + +define void @TW_oldinput(%struct.FILE* nocapture %fp) nounwind { +entry: + %xcenter = alloca i32, align 4 ; [#uses=2] + %0 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1] + %1 = icmp eq i32 %0, 4 ; [#uses=1] + br i1 %1, label %bb, label %return + +bb: ; preds = %bb445, %entry + %2 = load %struct.cellbox** undef, align 4 ; <%struct.cellbox*> [#uses=2] + %3 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 3 ; [#uses=1] + store i32 undef, i32* %3, align 4 + %4 = load i32* undef, align 4 ; [#uses=3] + %5 = icmp eq i32 undef, 1 ; [#uses=1] + br i1 %5, label %bb10, label %bb445 + +bb10: ; preds = %bb + br i1 undef, label %bb11, label %bb445 + +bb11: ; preds = %bb10 + %6 = load %struct.tilebox** undef, align 4 ; <%struct.tilebox*> [#uses=3] + %7 = load %struct.termbox** null, align 4 ; <%struct.termbox*> [#uses=1] + %8 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 13 ; [#uses=1] + %9 = load i32* %8, align 4 ; [#uses=3] + %10 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 15 ; [#uses=1] + %11 = load i32* %10, align 4 ; [#uses=1] + br i1 false, label %bb12, label %bb13 + +bb12: ; preds = %bb11 + unreachable + +bb13: ; preds = %bb11 + %iftmp.40.0.neg = sdiv i32 0, -2 ; [#uses=2] + %12 = sub nsw i32 0, %9 ; [#uses=1] + %13 = sitofp i32 %12 to double ; [#uses=1] + %14 = fdiv double %13, 0.000000e+00 ; [#uses=1] + %15 = fptosi double %14 to i32 ; [#uses=1] + %iftmp.41.0.in = add i32 0, %15 ; [#uses=1] + %iftmp.41.0.neg = sdiv i32 %iftmp.41.0.in, -2 ; [#uses=3] + br i1 undef, label %bb43.loopexit, label %bb21 + +bb21: ; preds = %bb13 + %16 = fptosi double undef to i32 ; [#uses=1] + %17 = fsub double undef, 0.000000e+00 ; [#uses=1] + %not.460 = fcmp oge double %17, 5.000000e-01 ; [#uses=1] + %18 = zext i1 %not.460 to i32 ; [#uses=1] + %iftmp.42.0 = add i32 %16, %iftmp.41.0.neg ; [#uses=1] + %19 = add i32 %iftmp.42.0, %18 ; [#uses=1] + store i32 %19, i32* undef, align 4 + %20 = sub nsw i32 0, %9 ; [#uses=1] + %21 = sitofp i32 %20 to double ; [#uses=1] + %22 = fdiv double %21, 0.000000e+00 ; [#uses=2] + %23 = fptosi double %22 to i32 ; [#uses=1] + %24 = fsub double %22, undef ; [#uses=1] + %not.461 = fcmp oge double %24, 5.000000e-01 ; [#uses=1] + %25 = zext i1 %not.461 to i32 ; [#uses=1] + %iftmp.43.0 = add i32 %23, %iftmp.41.0.neg ; [#uses=1] + %26 = add i32 %iftmp.43.0, %25 ; [#uses=1] + %27 = getelementptr inbounds %struct.tilebox* %6, i32 0, i32 10 ; [#uses=1] + store i32 %26, i32* %27, align 4 + %28 = fptosi double undef to i32 ; [#uses=1] + %iftmp.45.0 = add i32 %28, %iftmp.40.0.neg ; [#uses=1] + %29 = add i32 %iftmp.45.0, 0 ; [#uses=1] + store i32 %29, i32* undef, align 4 + br label %bb43.loopexit + +bb36: ; preds = %bb43.loopexit, %bb36 + %termptr.0478 = phi %struct.termbox* [ %42, %bb36 ], [ %7, %bb43.loopexit ] ; <%struct.termbox*> [#uses=1] + %30 = load i32* undef, align 4 ; [#uses=1] + %31 = sub nsw i32 %30, %9 ; [#uses=1] + %32 = sitofp i32 %31 to double ; [#uses=1] + %33 = fdiv double %32, 0.000000e+00 ; [#uses=1] + %34 = fptosi double %33 to i32 ; [#uses=1] + %iftmp.46.0 = add i32 %34, %iftmp.41.0.neg ; [#uses=1] + %35 = add i32 %iftmp.46.0, 0 ; [#uses=1] + store i32 %35, i32* undef, align 4 + %36 = sub nsw i32 0, %11 ; [#uses=1] + %37 = sitofp i32 %36 to double ; [#uses=1] + %38 = fmul double %37, 0.000000e+00 ; [#uses=1] + %39 = fptosi double %38 to i32 ; [#uses=1] + %iftmp.47.0 = add i32 %39, %iftmp.40.0.neg ; [#uses=1] + %40 = add i32 %iftmp.47.0, 0 ; [#uses=1] + store i32 %40, i32* undef, align 4 + %41 = getelementptr inbounds %struct.termbox* %termptr.0478, i32 0, i32 0 ; <%struct.termbox**> [#uses=1] + %42 = load %struct.termbox** %41, align 4 ; <%struct.termbox*> [#uses=2] + %43 = icmp eq %struct.termbox* %42, null ; [#uses=1] + br i1 %43, label %bb52.loopexit, label %bb36 + +bb43.loopexit: ; preds = %bb21, %bb13 + br i1 undef, label %bb52.loopexit, label %bb36 + +bb52.loopexit: ; preds = %bb43.loopexit, %bb36 + %44 = icmp eq i32 %4, 0 ; [#uses=1] + br i1 %44, label %bb.nph485, label %bb54 + +bb54: ; preds = %bb52.loopexit + switch i32 %4, label %bb62 [ + i32 2, label %bb56 + i32 3, label %bb57 + ] + +bb56: ; preds = %bb54 + br label %bb62 + +bb57: ; preds = %bb54 + br label %bb62 + +bb62: ; preds = %bb57, %bb56, %bb54 + unreachable + +bb.nph485: ; preds = %bb52.loopexit + br label %bb248 + +bb248: ; preds = %bb322, %bb.nph485 + %45 = icmp eq i32 undef, %4 ; [#uses=1] + br i1 %45, label %bb322, label %bb249 + +bb249: ; preds = %bb248 + %46 = getelementptr inbounds %struct.cellbox* %2, i32 0, i32 21, i32 undef ; <%struct.tilebox**> [#uses=1] + %47 = load %struct.tilebox** %46, align 4 ; <%struct.tilebox*> [#uses=1] + %48 = getelementptr inbounds %struct.tilebox* %47, i32 0, i32 11 ; [#uses=1] + store i32 undef, i32* %48, align 4 + unreachable + +bb322: ; preds = %bb248 + br i1 undef, label %bb248, label %bb445 + +bb445: ; preds = %bb322, %bb10, %bb + %49 = call i32 (%struct.FILE*, i8*, ...)* @fscanf(%struct.FILE* %fp, i8* getelementptr inbounds ([14 x i8]* @.str2708, i32 0, i32 0), i32* undef, i32* undef, i32* %xcenter, i32* null) nounwind ; [#uses=1] + %50 = icmp eq i32 %49, 4 ; [#uses=1] + br i1 %50, label %bb, label %return + +return: ; preds = %bb445, %entry + ret void +} + +declare i32 @fscanf(%struct.FILE* nocapture, i8* nocapture, ...) nounwind diff --git a/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll new file mode 100755 index 0000000..7650d88 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-21-nondarwin-tc.ll @@ -0,0 +1,145 @@ +; RUN: llc < %s -march=arm -mtriple=armv4t-unknown-linux-gnueabi | FileCheck %s +; PR 7433 + +%0 = type { i8*, i8* } +%1 = type { i8*, i8*, i8* } +%"class.llvm::Record" = type { i32, %"class.std::basic_string", %"class.llvm::SMLoc", %"class.std::vector", %"class.std::vector", %"class.std::vector" } +%"class.llvm::RecordVal" = type { %"class.std::basic_string", %"struct.llvm::Init"*, i32, %"struct.llvm::Init"* } +%"class.llvm::SMLoc" = type { i8* } +%"class.llvm::StringInit" = type { [8 x i8], %"class.std::basic_string" } +%"class.std::basic_string" = type { %"class.llvm::SMLoc" } +%"class.std::vector" = type { [12 x i8] } +%"struct.llvm::Init" = type { i32 (...)** } + +@_ZTIN4llvm5RecTyE = external constant %0 ; <%0*> [#uses=1] +@_ZTIN4llvm4InitE = external constant %0 ; <%0*> [#uses=1] +@_ZTIN4llvm11RecordRecTyE = external constant %1 ; <%1*> [#uses=1] +@.str8 = external constant [47 x i8] ; <[47 x i8]*> [#uses=1] +@_ZTIN4llvm9UnsetInitE = external constant %1 ; <%1*> [#uses=1] +@.str51 = external constant [45 x i8] ; <[45 x i8]*> [#uses=1] +@__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs = external constant [116 x i8] ; <[116 x i8]*> [#uses=1] + +@_ZN4llvm9RecordValC1ERKSsPNS_5RecTyEj = alias void (%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32)* @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj ; [#uses=0] + +declare i8* @__dynamic_cast(i8*, i8*, i8*, i32) + +declare void @__assert_fail(i8*, i8*, i32, i8*) noreturn + +declare void @_ZN4llvm9RecordValC2ERKSsPNS_5RecTyEj(%"class.llvm::RecordVal"*, %"class.std::basic_string"*, %"struct.llvm::Init"*, i32) align 2 + +define %"struct.llvm::Init"* @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs(%"class.llvm::StringInit"* %this, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) align 2 { +;CHECK: ldmia sp!, {r4, r5, r6, r7, r8, lr} +;CHECK: bx r12 @ TAILCALL +entry: + %.loc = alloca i32 ; [#uses=2] + %tmp.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 0, i32 4 ; [#uses=1] + %0 = bitcast i8* %tmp.i to %"struct.llvm::Init"** ; <%"struct.llvm::Init"**> [#uses=1] + %tmp2.i = load %"struct.llvm::Init"** %0 ; <%"struct.llvm::Init"*> [#uses=2] + %1 = icmp eq %"struct.llvm::Init"* %tmp2.i, null ; [#uses=1] + br i1 %1, label %entry.return_crit_edge, label %tmpbb + +entry.return_crit_edge: ; preds = %entry + br label %return + +tmpbb: ; preds = %entry + %2 = bitcast %"struct.llvm::Init"* %tmp2.i to i8* ; [#uses=1] + %3 = tail call i8* @__dynamic_cast(i8* %2, i8* bitcast (%0* @_ZTIN4llvm5RecTyE to i8*), i8* bitcast (%1* @_ZTIN4llvm11RecordRecTyE to i8*), i32 -1) ; [#uses=1] + %phitmp = icmp eq i8* %3, null ; [#uses=1] + br i1 %phitmp, label %.return_crit_edge, label %if.then + +.return_crit_edge: ; preds = %tmpbb + br label %return + +if.then: ; preds = %tmpbb + %tmp2.i.i.i.i = getelementptr inbounds %"class.llvm::StringInit"* %this, i32 0, i32 1, i32 0, i32 0 ; [#uses=1] + %tmp3.i.i.i.i = load i8** %tmp2.i.i.i.i ; [#uses=2] + %arrayidx.i.i.i.i = getelementptr inbounds i8* %tmp3.i.i.i.i, i32 -12 ; [#uses=1] + %tmp.i.i.i = bitcast i8* %arrayidx.i.i.i.i to i32* ; [#uses=1] + %tmp2.i.i.i = load i32* %tmp.i.i.i ; [#uses=1] + %tmp.i5 = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4 ; <%"class.std::vector"*> [#uses=1] + %tmp2.i.i = getelementptr inbounds %"class.llvm::Record"* %R, i32 0, i32 4, i32 0, i32 4 ; [#uses=1] + %4 = bitcast i8* %tmp2.i.i to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1] + %tmp3.i.i6 = load %"class.llvm::RecordVal"** %4 ; <%"class.llvm::RecordVal"*> [#uses=1] + %tmp5.i.i = bitcast %"class.std::vector"* %tmp.i5 to %"class.llvm::RecordVal"** ; <%"class.llvm::RecordVal"**> [#uses=1] + %tmp6.i.i = load %"class.llvm::RecordVal"** %tmp5.i.i ; <%"class.llvm::RecordVal"*> [#uses=5] + %sub.ptr.lhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp3.i.i6 to i32 ; [#uses=1] + %sub.ptr.rhs.cast.i.i = ptrtoint %"class.llvm::RecordVal"* %tmp6.i.i to i32 ; [#uses=1] + %sub.ptr.sub.i.i = sub i32 %sub.ptr.lhs.cast.i.i, %sub.ptr.rhs.cast.i.i ; [#uses=1] + %sub.ptr.div.i.i = ashr i32 %sub.ptr.sub.i.i, 4 ; [#uses=1] + br label %codeRepl + +codeRepl: ; preds = %if.then + %targetBlock = call i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32 %sub.ptr.div.i.i, %"class.llvm::RecordVal"* %tmp6.i.i, i32 %tmp2.i.i.i, i8* %tmp3.i.i.i.i, i32* %.loc) ; [#uses=1] + %.reload = load i32* %.loc ; [#uses=3] + br i1 %targetBlock, label %for.cond.i.return_crit_edge, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit + +for.cond.i.return_crit_edge: ; preds = %codeRepl + br label %return + +_ZN4llvm6Record8getValueENS_9StringRefE.exit: ; preds = %codeRepl + %add.ptr.i.i = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload ; <%"class.llvm::RecordVal"*> [#uses=2] + %tobool5 = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, null ; [#uses=1] + br i1 %tobool5, label %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, label %if.then6 + +_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge: ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit + br label %return + +if.then6: ; preds = %_ZN4llvm6Record8getValueENS_9StringRefE.exit + %cmp = icmp eq %"class.llvm::RecordVal"* %add.ptr.i.i, %RV ; [#uses=1] + br i1 %cmp, label %if.then6.if.end_crit_edge, label %land.lhs.true + +if.then6.if.end_crit_edge: ; preds = %if.then6 + br label %if.end + +land.lhs.true: ; preds = %if.then6 + %tobool10 = icmp eq %"class.llvm::RecordVal"* %RV, null ; [#uses=1] + br i1 %tobool10, label %lor.lhs.false, label %land.lhs.true.return_crit_edge + +land.lhs.true.return_crit_edge: ; preds = %land.lhs.true + br label %return + +lor.lhs.false: ; preds = %land.lhs.true + %tmp.i3 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1] + %tmp2.i4 = load %"struct.llvm::Init"** %tmp.i3 ; <%"struct.llvm::Init"*> [#uses=2] + %5 = icmp eq %"struct.llvm::Init"* %tmp2.i4, null ; [#uses=1] + br i1 %5, label %lor.lhs.false.if.end_crit_edge, label %tmpbb1 + +lor.lhs.false.if.end_crit_edge: ; preds = %lor.lhs.false + br label %if.end + +tmpbb1: ; preds = %lor.lhs.false + %6 = bitcast %"struct.llvm::Init"* %tmp2.i4 to i8* ; [#uses=1] + %7 = tail call i8* @__dynamic_cast(i8* %6, i8* bitcast (%0* @_ZTIN4llvm4InitE to i8*), i8* bitcast (%1* @_ZTIN4llvm9UnsetInitE to i8*), i32 -1) ; [#uses=1] + %phitmp32 = icmp eq i8* %7, null ; [#uses=1] + br i1 %phitmp32, label %.if.end_crit_edge, label %.return_crit_edge1 + +.return_crit_edge1: ; preds = %tmpbb1 + br label %return + +.if.end_crit_edge: ; preds = %tmpbb1 + br label %if.end + +if.end: ; preds = %.if.end_crit_edge, %lor.lhs.false.if.end_crit_edge, %if.then6.if.end_crit_edge + %tmp.i1 = getelementptr inbounds %"class.llvm::RecordVal"* %tmp6.i.i, i32 %.reload, i32 3 ; <%"struct.llvm::Init"**> [#uses=1] + %tmp2.i2 = load %"struct.llvm::Init"** %tmp.i1 ; <%"struct.llvm::Init"*> [#uses=3] + %8 = bitcast %"class.llvm::StringInit"* %this to %"struct.llvm::Init"* ; <%"struct.llvm::Init"*> [#uses=1] + %cmp19 = icmp eq %"struct.llvm::Init"* %tmp2.i2, %8 ; [#uses=1] + br i1 %cmp19, label %cond.false, label %cond.end + +cond.false: ; preds = %if.end + tail call void @__assert_fail(i8* getelementptr inbounds ([45 x i8]* @.str51, i32 0, i32 0), i8* getelementptr inbounds ([47 x i8]* @.str8, i32 0, i32 0), i32 1141, i8* getelementptr inbounds ([116 x i8]* @__PRETTY_FUNCTION__._ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs, i32 0, i32 0)) noreturn + unreachable + +cond.end: ; preds = %if.end + %9 = bitcast %"struct.llvm::Init"* %tmp2.i2 to %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)***> [#uses=1] + %10 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*** %9 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1] + %vfn = getelementptr inbounds %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %10, i32 8 ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)**> [#uses=1] + %11 = load %"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)** %vfn ; <%"struct.llvm::Init"* (%"struct.llvm::Init"*, %"class.llvm::Record"*, %"class.llvm::RecordVal"*, %"class.std::basic_string"*)*> [#uses=1] + %call25 = tail call %"struct.llvm::Init"* %11(%"struct.llvm::Init"* %tmp2.i2, %"class.llvm::Record"* %R, %"class.llvm::RecordVal"* %RV, %"class.std::basic_string"* %FieldName) ; <%"struct.llvm::Init"*> [#uses=1] + ret %"struct.llvm::Init"* %call25 + +return: ; preds = %.return_crit_edge1, %land.lhs.true.return_crit_edge, %_ZN4llvm6Record8getValueENS_9StringRefE.exit.return_crit_edge, %for.cond.i.return_crit_edge, %.return_crit_edge, %entry.return_crit_edge + ret %"struct.llvm::Init"* null +} + +declare i1 @_ZNK4llvm7VarInit12getFieldInitERNS_6RecordEPKNS_9RecordValERKSs_for.cond.i(i32, %"class.llvm::RecordVal"*, i32, i8*, i32*) diff --git a/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll new file mode 100644 index 0000000..cdb11c7 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-25-Thumb2ITInvalidIterator.ll @@ -0,0 +1,75 @@ +; RUN: llc < %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin3.0.0-iphoneos" + +@length = common global i32 0, align 4 ; [#uses=1] + +define void @x0(i8* nocapture %buf, i32 %nbytes) nounwind optsize { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %buf}, i64 0, metadata !0), !dbg !15 + tail call void @llvm.dbg.value(metadata !{i32 %nbytes}, i64 0, metadata !8), !dbg !16 + %tmp = load i32* @length, !dbg !17 ; [#uses=3] + %cmp = icmp eq i32 %tmp, -1, !dbg !17 ; [#uses=1] + %cmp.not = xor i1 %cmp, true ; [#uses=1] + %cmp3 = icmp ult i32 %tmp, %nbytes, !dbg !17 ; [#uses=1] + %or.cond = and i1 %cmp.not, %cmp3 ; [#uses=1] + tail call void @llvm.dbg.value(metadata !{i32 %tmp}, i64 0, metadata !8), !dbg !17 + %nbytes.addr.0 = select i1 %or.cond, i32 %tmp, i32 %nbytes ; [#uses=1] + tail call void @llvm.dbg.value(metadata !18, i64 0, metadata !10), !dbg !19 + br label %while.cond, !dbg !20 + +while.cond: ; preds = %while.body, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %while.body ] ; [#uses=3] + %buf.addr.0 = getelementptr i8* %buf, i32 %0 ; [#uses=1] + %cmp7 = icmp ult i32 %0, %nbytes.addr.0, !dbg !20 ; [#uses=1] + br i1 %cmp7, label %land.rhs, label %while.end, !dbg !20 + +land.rhs: ; preds = %while.cond + %call = tail call i32 @x1() nounwind optsize, !dbg !20 ; [#uses=2] + %cmp9 = icmp eq i32 %call, -1, !dbg !20 ; [#uses=1] + br i1 %cmp9, label %while.end, label %while.body, !dbg !20 + +while.body: ; preds = %land.rhs + %conv = trunc i32 %call to i8, !dbg !21 ; [#uses=1] + store i8 %conv, i8* %buf.addr.0, !dbg !21 + %inc = add i32 %0, 1, !dbg !23 ; [#uses=1] + br label %while.cond, !dbg !24 + +while.end: ; preds = %land.rhs, %while.cond + ret void, !dbg !25 +} + +declare i32 @x1() optsize + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.lv.fn = !{!0, !8, !10, !12} +!llvm.dbg.gv = !{!14} + +!0 = metadata !{i32 524545, metadata !1, metadata !"buf", metadata !2, i32 4, metadata !6} ; [ DW_TAG_arg_variable ] +!1 = metadata !{i32 524334, i32 0, metadata !2, metadata !"x0", metadata !"x0", metadata !"x0", metadata !2, i32 5, metadata !4, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!2 = metadata !{i32 524329, metadata !"t.c", metadata !"/private/tmp", metadata !3} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 524305, i32 0, i32 12, metadata !"t.c", metadata !".", metadata !"clang 2.0", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!4 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !5, i32 0, null} ; [ DW_TAG_subroutine_type ] +!5 = metadata !{null} +!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !7} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 524324, metadata !2, metadata !"unsigned char", metadata !2, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 524545, metadata !1, metadata !"nbytes", metadata !2, i32 4, metadata !9} ; [ DW_TAG_arg_variable ] +!9 = metadata !{i32 524324, metadata !2, metadata !"unsigned long", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!10 = metadata !{i32 524544, metadata !11, metadata !"nread", metadata !2, i32 6, metadata !9} ; [ DW_TAG_auto_variable ] +!11 = metadata !{i32 524299, metadata !1, i32 5, i32 1} ; [ DW_TAG_lexical_block ] +!12 = metadata !{i32 524544, metadata !11, metadata !"c", metadata !2, i32 7, metadata !13} ; [ DW_TAG_auto_variable ] +!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 524340, i32 0, metadata !2, metadata !"length", metadata !"length", metadata !"length", metadata !2, i32 1, metadata !13, i1 false, i1 true, i32* @length} ; [ DW_TAG_variable ] +!15 = metadata !{i32 4, i32 24, metadata !1, null} +!16 = metadata !{i32 4, i32 43, metadata !1, null} +!17 = metadata !{i32 9, i32 2, metadata !11, null} +!18 = metadata !{i32 0} +!19 = metadata !{i32 10, i32 2, metadata !11, null} +!20 = metadata !{i32 11, i32 2, metadata !11, null} +!21 = metadata !{i32 12, i32 3, metadata !22, null} +!22 = metadata !{i32 524299, metadata !11, i32 11, i32 45} ; [ DW_TAG_lexical_block ] +!23 = metadata !{i32 13, i32 3, metadata !22, null} +!24 = metadata !{i32 14, i32 2, metadata !22, null} +!25 = metadata !{i32 15, i32 1, metadata !11, null} diff --git a/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll b/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll new file mode 100644 index 0000000..ad2810b --- /dev/null +++ b/test/CodeGen/ARM/2010-06-28-DAGCombineUndef.ll @@ -0,0 +1,10 @@ +; RUN: llc < %s -march=arm -mattr=+neon + +define void @main() nounwind { +entry: + store <2 x i64> undef, <2 x i64>* undef, align 16 + %0 = load <16 x i8>* undef, align 16 ; <<16 x i8>> [#uses=1] + %1 = or <16 x i8> zeroinitializer, %0 ; <<16 x i8>> [#uses=1] + store <16 x i8> %1, <16 x i8>* undef, align 16 + ret void +} diff --git a/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll new file mode 100644 index 0000000..0c5b180 --- /dev/null +++ b/test/CodeGen/ARM/2010-06-29-PartialRedefFastAlloc.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -O0 -mcpu=cortex-a8 | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +; This tests the fast register allocator's handling of partial redefines: +; +; %reg1028:dsub_0, %reg1028:dsub_1 = VLD1q64 %reg1025... +; %reg1030:dsub_1 = COPY %reg1028:dsub_0 +; +; %reg1028 gets allocated %Q0, and if %reg1030 is reloaded for the partial +; redef, it cannot also get %Q0. + +; CHECK: vld1.64 {d0, d1}, [r{{.}}] +; CHECK-NOT: vld1.64 {d0, d1} +; CHECK: vmov.f64 d3, d0 + +define i32 @test(i8* %arg) nounwind { +entry: + %0 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i8* %arg) + %1 = shufflevector <2 x i64> undef, <2 x i64> %0, <2 x i32> + store <2 x i64> %1, <2 x i64>* undef, align 16 + ret i32 undef +} + +declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly diff --git a/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll new file mode 100644 index 0000000..984583e --- /dev/null +++ b/test/CodeGen/ARM/2010-06-29-SubregImpDefs.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=arm -mattr=+neon + +@.str271 = external constant [21 x i8], align 4 ; <[21 x i8]*> [#uses=1] +@llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32, i8**)* @main to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] + +define i32 @main(i32 %argc, i8** %argv) nounwind { +entry: + %0 = shufflevector <2 x i64> undef, <2 x i64> zeroinitializer, <2 x i32> ; <<2 x i64>> [#uses=1] + store <2 x i64> %0, <2 x i64>* undef, align 16 + %val4723 = load <8 x i16>* undef ; <<8 x i16>> [#uses=1] + call void @PrintShortX(i8* getelementptr inbounds ([21 x i8]* @.str271, i32 0, i32 0), <8 x i16> %val4723, i32 0) nounwind + ret i32 undef +} + +declare void @PrintShortX(i8*, <8 x i16>, i32) nounwind diff --git a/test/CodeGen/ARM/alloca.ll b/test/CodeGen/ARM/alloca.ll index 82a8c98..4a0835a 100644 --- a/test/CodeGen/ARM/alloca.ll +++ b/test/CodeGen/ARM/alloca.ll @@ -2,11 +2,11 @@ define void @f(i32 %a) { entry: -; CHECK: mov r11, sp +; CHECK: add r11, sp, #4 %tmp = alloca i8, i32 %a ; [#uses=1] call void @g( i8* %tmp, i32 %a, i32 1, i32 2, i32 3 ) ret void -; CHECK: mov sp, r11 +; CHECK: sub sp, r11, #4 } declare void @g(i8*, i32, i32, i32, i32) diff --git a/test/CodeGen/ARM/arm-frameaddr.ll b/test/CodeGen/ARM/arm-frameaddr.ll index 1c7ac25..2cf1422 100644 --- a/test/CodeGen/ARM/arm-frameaddr.ll +++ b/test/CodeGen/ARM/arm-frameaddr.ll @@ -3,7 +3,7 @@ ; PR4344 ; PR4416 -define arm_aapcscc i8* @t() nounwind { +define i8* @t() nounwind { entry: ; DARWIN: t: ; DARWIN: mov r0, r7 diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll index 2c8f2ab..382a183 100644 --- a/test/CodeGen/ARM/arm-returnaddr.ll +++ b/test/CodeGen/ARM/arm-returnaddr.ll @@ -1,19 +1,21 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -; RUN: llc < %s -mtriple=thumbv6-apple-darwin +; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s ; rdar://8015977 ; rdar://8020118 -define arm_apcscc i8* @rt0(i32 %x) nounwind readnone { +define i8* @rt0(i32 %x) nounwind readnone { entry: ; CHECK: rt0: +; CHECK: {r7, lr} ; CHECK: mov r0, lr %0 = tail call i8* @llvm.returnaddress(i32 0) ret i8* %0 } -define arm_apcscc i8* @rt2() nounwind readnone { +define i8* @rt2() nounwind readnone { entry: ; CHECK: rt2: +; CHECK: {r7, lr} ; CHECK: ldr r0, [r7] ; CHECK: ldr r0, [r0] ; CHECK: ldr r0, [r0, #4] diff --git a/test/CodeGen/ARM/armv4.ll b/test/CodeGen/ARM/armv4.ll index 49b129da..ef722de 100644 --- a/test/CodeGen/ARM/armv4.ll +++ b/test/CodeGen/ARM/armv4.ll @@ -5,7 +5,7 @@ ; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM ; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB -define arm_aapcscc i32 @test(i32 %a) nounwind readnone { +define i32 @test(i32 %a) nounwind readnone { entry: ; ARM: mov pc ; THUMB: bx diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll new file mode 100644 index 0000000..f1269d5 --- /dev/null +++ b/test/CodeGen/ARM/call-tc.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin -march=arm | FileCheck %s -check-prefix=CHECKV4 +; RUN: llc < %s -march=arm -mtriple=arm-apple-darwin -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5 +; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\ +; RUN: -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF + +@t = weak global i32 ()* null ; [#uses=1] + +declare void @g(i32, i32, i32, i32) + +define void @t1() { +; CHECKELF: t1: +; CHECKELF: PLT + call void @g( i32 1, i32 2, i32 3, i32 4 ) + ret void +} + +define void @t2() { +; CHECKV4: t2: +; CHECKV4: bx r0 @ TAILCALL +; CHECKV5: t2: +; CHECKV5: bx r0 @ TAILCALL + %tmp = load i32 ()** @t ; [#uses=1] + %tmp.upgrd.2 = tail call i32 %tmp( ) ; [#uses=0] + ret void +} + +define i32* @t3(i32, i32, i32*, i32*, i32*) nounwind { +; CHECKV4: t3: +; CHECKV4: bx r{{.*}} +BB0: + %5 = inttoptr i32 %0 to i32* ; [#uses=1] + %t35 = volatile load i32* %5 ; [#uses=1] + %6 = inttoptr i32 %t35 to i32** ; [#uses=1] + %7 = getelementptr i32** %6, i32 86 ; [#uses=1] + %8 = load i32** %7 ; [#uses=1] + %9 = bitcast i32* %8 to i32* (i32, i32*, i32, i32*, i32*, i32*)* ; [#uses=1] + %10 = call i32* %9(i32 %0, i32* null, i32 %1, i32* %2, i32* %3, i32* %4) ; [#uses=1] + ret i32* %10 +} + +define void @t4() { +; CHECKV4: t4: +; CHECKV4: b _t2 @ TAILCALL +; CHECKV5: t4: +; CHECKV5: b _t2 @ TAILCALL + tail call void @t2( ) ; [#uses=0] + ret void +} diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll index c60b75b..c020b6f 100644 --- a/test/CodeGen/ARM/call.ll +++ b/test/CodeGen/ARM/call.ll @@ -8,16 +8,16 @@ declare void @g(i32, i32, i32, i32) define void @f() { -; CHECKV4: mov lr, pc -; CHECKV5: blx ; CHECKELF: PLT call void @g( i32 1, i32 2, i32 3, i32 4 ) ret void } define void @g.upgrd.1() { +; CHECKV4: mov lr, pc +; CHECKV5: blx %tmp = load i32 ()** @t ; [#uses=1] - %tmp.upgrd.2 = tail call i32 %tmp( ) ; [#uses=0] + %tmp.upgrd.2 = call i32 %tmp( ) ; [#uses=0] ret void } diff --git a/test/CodeGen/ARM/crash-O0.ll b/test/CodeGen/ARM/crash-O0.ll new file mode 100644 index 0000000..8bce4e0 --- /dev/null +++ b/test/CodeGen/ARM/crash-O0.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -O0 -relocation-model=pic -disable-fp-elim +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-n32" +target triple = "armv6-apple-darwin10" + +%struct0 = type { i32, i32 } + +; This function would crash RegAllocFast because it tried to spill %CPSR. +define arm_apcscc void @clobber_cc() nounwind noinline ssp { +entry: + %asmtmp = call %struct0 asm sideeffect "...", "=&r,=&r,r,Ir,r,~{cc},~{memory}"(i32* undef, i32 undef, i32 1) nounwind ; <%0> [#uses=0] + unreachable +} + +@.str523 = private constant [256 x i8] c"\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00", align 4 ; <[256 x i8]*> [#uses=1] +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind + +; This function uses the scavenger for an ADDri instruction. +; ARMBaseRegisterInfo::estimateRSStackSizeLimit must return a 255 limit. +define arm_apcscc void @scavence_ADDri() nounwind { +entry: + %letter = alloca i8 ; [#uses=0] + %prodvers = alloca [256 x i8] ; <[256 x i8]*> [#uses=1] + %buildver = alloca [256 x i8] ; <[256 x i8]*> [#uses=0] + call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + %prodvers2 = bitcast [256 x i8]* %prodvers to i8* ; [#uses=1] + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %prodvers2, i8* getelementptr inbounds ([256 x i8]* @.str523, i32 0, i32 0), i32 256, i32 1, i1 false) + unreachable +} diff --git a/test/CodeGen/ARM/flag-crash.ll b/test/CodeGen/ARM/flag-crash.ll new file mode 100644 index 0000000..9c61944 --- /dev/null +++ b/test/CodeGen/ARM/flag-crash.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 -relocation-model=pic +; PR7484 + +%struct.gs_matrix = type { float, i32, float, i32, float, i32, float, i32, float, i32, float, i32 } + +define fastcc void @func(%struct.gs_matrix* nocapture %pm1) nounwind { +entry: + %0 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 6 + %1 = load float* %0, align 4 + %2 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 8 + %3 = load float* %2, align 4 + %4 = getelementptr inbounds %struct.gs_matrix* %pm1, i32 0, i32 2 + %5 = bitcast float* %4 to i32* + %6 = load i32* %5, align 4 + %7 = or i32 0, %6 + %.mask = and i32 %7, 2147483647 + %8 = icmp eq i32 %.mask, 0 + br i1 %8, label %bb, label %bb11 + +bb: + ret void + +bb11: + %9 = fmul float %1, undef + %10 = fmul float %3, undef + ret void +} diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll new file mode 100644 index 0000000..8016033 --- /dev/null +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s +; rdar://7461510 + +define arm_apcscc i32 @t1(float* %a, float* %b) nounwind { +entry: +; CHECK: t1: +; CHECK-NOT: vldr +; CHECK: ldr +; CHECK: ldr +; CHECK: cmp r0, r1 +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: beq + %0 = load float* %a + %1 = load float* %b + %2 = fcmp une float %0, %1 + br i1 %2, label %bb1, label %bb2 + +bb1: + %3 = call i32 @bar() + ret i32 %3 + +bb2: + %4 = call i32 @foo() + ret i32 %4 +} + +declare i32 @bar() +declare i32 @foo() diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll index 710994d..f1d6a16 100644 --- a/test/CodeGen/ARM/fpconsts.ll +++ b/test/CodeGen/ARM/fpconsts.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+vfp3 | FileCheck %s -define arm_apcscc float @t1(float %x) nounwind readnone optsize { +define float @t1(float %x) nounwind readnone optsize { entry: ; CHECK: t1: ; CHECK: vmov.f32 s1, #4.000000e+00 @@ -8,7 +8,7 @@ entry: ret float %0 } -define arm_apcscc double @t2(double %x) nounwind readnone optsize { +define double @t2(double %x) nounwind readnone optsize { entry: ; CHECK: t2: ; CHECK: vmov.f64 d1, #3.000000e+00 @@ -16,7 +16,7 @@ entry: ret double %0 } -define arm_apcscc double @t3(double %x) nounwind readnone optsize { +define double @t3(double %x) nounwind readnone optsize { entry: ; CHECK: t3: ; CHECK: vmov.f64 d1, #-1.300000e+01 @@ -24,7 +24,7 @@ entry: ret double %0 } -define arm_apcscc float @t4(float %x) nounwind readnone optsize { +define float @t4(float %x) nounwind readnone optsize { entry: ; CHECK: t4: ; CHECK: vmov.f32 s1, #-2.400000e+01 diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll index d9cac80..7b9d0cf 100644 --- a/test/CodeGen/ARM/ifcvt2.ll +++ b/test/CodeGen/ARM/ifcvt2.ll @@ -1,10 +1,8 @@ -; RUN: llc < %s -march=arm > %t -; RUN: grep bxlt %t | count 1 -; RUN: grep bxgt %t | count 1 -; RUN: not grep bxge %t -; RUN: not grep bxle %t +; RUN: llc < %s -march=arm | FileCheck %s define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK: t1: +; CHECK: bxlt lr %tmp2 = icmp sgt i32 %c, 10 %tmp5 = icmp slt i32 %d, 4 %tmp8 = or i1 %tmp5, %tmp2 @@ -21,6 +19,13 @@ UnifiedReturnBlock: } define i32 @t2(i32 %a, i32 %b, i32 %c, i32 %d) { +; CHECK: t2: +; CHECK: bxgt lr +; CHECK: cmp +; CHECK: addge +; CHECK: subge +; CHECK-NOT: bxge lr +; CHECK: bx lr %tmp2 = icmp sgt i32 %c, 10 %tmp5 = icmp slt i32 %d, 4 %tmp8 = and i1 %tmp5, %tmp2 diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 342208b..e2c0ba3 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -11,7 +11,7 @@ entry: br i1 %tmp7, label %cond_true, label %UnifiedReturnBlock cond_true: ; preds = %entry - %tmp10 = tail call i32 (...)* @bar( ) ; [#uses=0] + %tmp10 = call i32 (...)* @bar( ) ; [#uses=0] ret void UnifiedReturnBlock: ; preds = %entry diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index f898060..0aac9d1 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -5,7 +5,7 @@ @nextaddr = global i8* null ; [#uses=2] @C.0.2070 = private constant [5 x i8*] [i8* blockaddress(@foo, %L1), i8* blockaddress(@foo, %L2), i8* blockaddress(@foo, %L3), i8* blockaddress(@foo, %L4), i8* blockaddress(@foo, %L5)] ; <[5 x i8*]*> [#uses=1] -define internal arm_apcscc i32 @foo(i32 %i) nounwind { +define internal i32 @foo(i32 %i) nounwind { ; ARM: foo: ; THUMB: foo: ; THUMB2: foo: diff --git a/test/CodeGen/ARM/inlineasm.ll b/test/CodeGen/ARM/inlineasm.ll index d522348..cca3c69 100644 --- a/test/CodeGen/ARM/inlineasm.ll +++ b/test/CodeGen/ARM/inlineasm.ll @@ -6,14 +6,6 @@ define i32 @test1(i32 %tmp54) { } define void @test2() { - %tmp1 = call i64 asm "ldmia $1!, {$0, ${0:H}}", "=r,=*r,1"( i32** null, i32* null ) ; [#uses=2] - %tmp2 = lshr i64 %tmp1, 32 ; [#uses=1] - %tmp3 = trunc i64 %tmp2 to i32 ; [#uses=1] - %tmp4 = call i32 asm "pkhbt $0, $1, $2, lsl #16", "=r,r,r"( i32 0, i32 %tmp3 ) ; [#uses=0] - ret void -} - -define void @test3() { tail call void asm sideeffect "/* number: ${0:c} */", "i"( i32 1 ) ret void } diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index f062772..687e138 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -3,7 +3,7 @@ ; Radar 7449043 %struct.int32x4_t = type { <4 x i32> } -define arm_apcscc void @t() nounwind { +define void @t() nounwind { entry: ; CHECK: vmov.I64 q15, #0 ; CHECK: vmov.32 d30[0], r0 @@ -16,7 +16,7 @@ entry: ; Radar 7457110 %struct.int32x2_t = type { <4 x i32> } -define arm_apcscc void @t2() nounwind { +define void @t2() nounwind { entry: ; CHECK: vmov d30, d0 ; CHECK: vmov.32 r0, d30[0] diff --git a/test/CodeGen/ARM/insn-sched1.ll b/test/CodeGen/ARM/insn-sched1.ll index 59f0d53..1d32322 100644 --- a/test/CodeGen/ARM/insn-sched1.ll +++ b/test/CodeGen/ARM/insn-sched1.ll @@ -4,7 +4,7 @@ define i32 @test(i32 %x) { %tmp = trunc i32 %x to i16 ; [#uses=1] - %tmp2 = tail call i32 @f( i32 1, i16 %tmp ) ; [#uses=1] + %tmp2 = call i32 @f( i32 1, i16 %tmp ) ; [#uses=1] ret i32 %tmp2 } diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll index 9a2dc82..78201a6 100644 --- a/test/CodeGen/ARM/ldm.ll +++ b/test/CodeGen/ARM/ldm.ll @@ -28,7 +28,7 @@ define i32 @t3() { %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] - %tmp6 = tail call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] + %tmp6 = call i32 @f2( i32 %tmp, i32 %tmp3, i32 %tmp5 ) ; [#uses=1] ret i32 %tmp6 } diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 76332cc..688b7bc 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -23,10 +23,10 @@ define i32 @f1(i64 %x, i64 %y) { define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: mov r0, r0, lsr r2 -; CHECK-NEXT: rsb r12, r2, #32 +; CHECK-NEXT: rsb r3, r2, #32 ; CHECK-NEXT: sub r2, r2, #32 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: orr r0, r0, r1, lsl r12 +; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, asr r2 %a = ashr i64 %x, %y %b = trunc i64 %a to i32 @@ -36,10 +36,10 @@ define i32 @f2(i64 %x, i64 %y) { define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: mov r0, r0, lsr r2 -; CHECK-NEXT: rsb r12, r2, #32 +; CHECK-NEXT: rsb r3, r2, #32 ; CHECK-NEXT: sub r2, r2, #32 ; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: orr r0, r0, r1, lsl r12 +; CHECK-NEXT: orr r0, r0, r1, lsl r3 ; CHECK-NEXT: movge r0, r1, lsr r2 %a = lshr i64 %x, %y %b = trunc i64 %a to i32 diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 1bbb96d..b8c543b 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} +; RUN: llc < %s -stats |& grep {38.*Number of machine instrs printed} ; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 2ac4084..25cf135 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r12, #-128] -; CHECK: vstr.32 s0, [r12, #-96] -; CHECK: vstr.32 s0, [r12, #-64] -; CHECK: vstr.32 s0, [r12, #-32] -; CHECK: vstr.32 s0, [r12] -; CHECK: vstr.32 s0, [r12, #32] -; CHECK: vstr.32 s0, [r12, #64] -; CHECK: vstr.32 s0, [r12, #96] +; CHECK: vstr.32 s0, [r9, #-128] +; CHECK: vstr.32 s0, [r9, #-96] +; CHECK: vstr.32 s0, [r9, #-64] +; CHECK: vstr.32 s0, [r9, #-32] +; CHECK: vstr.32 s0, [r9] +; CHECK: vstr.32 s0, [r9, #32] +; CHECK: vstr.32 s0, [r9, #64] +; CHECK: vstr.32 s0, [r9, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" @@ -40,7 +40,7 @@ target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32- %22 = type { void (%0*)*, void (%0*, i8***, i32, i8**, i32)* } %23 = type { void (%0*, i32)*, void (%0*, i8**, i8**, i32)*, void (%0*)*, void (%0*)* } -define arm_apcscc void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind { +define void @test(%0* nocapture %a0, %11* nocapture %a1, i16* nocapture %a2, i8** nocapture %a3, i32 %a4) nounwind { bb: %t = alloca [64 x float], align 4 %t5 = getelementptr inbounds %0* %a0, i32 0, i32 65 @@ -393,7 +393,7 @@ bb295: %struct.z_stream = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 } %union.anon = type { i16 } -define arm_apcscc i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize { +define i32 @longest_match(%struct.internal_state* %s, i32 %cur_match) nounwind optsize { entry: %0 = getelementptr inbounds %struct.internal_state* %s, i32 0, i32 31 ; [#uses=1] %1 = load i32* %0, align 4 ; [#uses=2] @@ -626,9 +626,11 @@ bb24: ; preds = %bb23 ; LSR should use count-down iteration to avoid requiring the trip count ; in a register, and it shouldn't require any reloads here. -; CHECK: sub.w r9, r9, #1 -; CHECK-NEXT: cmp.w r9, #0 -; CHECK-NEXT: bne.w +; CHECK: @ %bb24 +; CHECK-NEXT: @ in Loop: Header=BB1_1 Depth=1 +; CHECK-NEXT: sub{{.*}} [[REGISTER:r[0-9]+]], #1 +; CHECK-NEXT: cmp{{.*}} [[REGISTER]], #0 +; CHECK-NEXT: bne.w %92 = icmp eq i32 %tmp81, %indvar78 ; [#uses=1] %indvar.next79 = add i32 %indvar78, 1 ; [#uses=1] diff --git a/test/CodeGen/ARM/machine-cse-cmp.ll b/test/CodeGen/ARM/machine-cse-cmp.ll new file mode 100644 index 0000000..c77402f --- /dev/null +++ b/test/CodeGen/ARM/machine-cse-cmp.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -march=arm | FileCheck %s +;rdar://8003725 + +@G1 = external global i32 +@G2 = external global i32 + +define i32 @f1(i32 %cond1, i32 %x1, i32 %x2, i32 %x3) { +entry: +; CHECK: cmp +; CHECK: moveq +; CHECK-NOT: cmp +; CHECK: moveq + %tmp1 = icmp eq i32 %cond1, 0 + %tmp2 = select i1 %tmp1, i32 %x1, i32 %x2 + %tmp3 = select i1 %tmp1, i32 %x2, i32 %x3 + %tmp4 = add i32 %tmp2, %tmp3 + ret i32 %tmp4 +} diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 3ba82cc..9e365c9 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -8,7 +8,7 @@ %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> } %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> } -define arm_apcscc void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind { +define void @t1(i16* %i_ptr, i16* %o_ptr, %struct.int32x4_t* nocapture %vT0ptr, %struct.int32x4_t* nocapture %vT1ptr) nounwind { entry: ; CHECK: t1: ; CHECK: vld1.16 @@ -41,13 +41,13 @@ entry: ret void } -define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind { +define void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocapture %vT0ptr, %struct.int16x8_t* nocapture %vT1ptr) nounwind { entry: ; CHECK: t2: ; CHECK: vld1.16 -; CHECK: vld1.16 -; CHECK-NOT: vmov ; CHECK: vmul.i16 +; CHECK-NOT: vmov +; CHECK: vld1.16 ; CHECK: vmul.i16 ; CHECK-NOT: vmov ; CHECK: vst1.16 @@ -88,7 +88,7 @@ define <8 x i8> @t3(i8* %A, i8* %B) nounwind { ret <8 x i8> %tmp4 } -define arm_apcscc void @t4(i32* %in, i32* %out) nounwind { +define void @t4(i32* %in, i32* %out) nounwind { entry: ; CHECK: t4: ; CHECK: vld2.32 @@ -163,7 +163,7 @@ define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ret <8 x i8> %tmp5 } -define arm_apcscc void @t7(i32* %iptr, i32* %optr) nounwind { +define void @t7(i32* %iptr, i32* %optr) nounwind { entry: ; CHECK: t7: ; CHECK: vld2.32 @@ -238,9 +238,10 @@ bb14: ; preds = %bb6 define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { ; CHECK: t9: ; CHECK: vldr.64 +; CHECK-NOT: vmov d{{.*}}, d0 ; CHECK: vmov.i8 d1 -; CHECK-NEXT: vstmia r0, {d2,d3} -; CHECK-NEXT: vstmia r0, {d0,d1} +; CHECK-NEXT: vstmia r0, {d0, d1} +; CHECK-NEXT: vstmia r0, {d0, d1} %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> ; <<4 x float>> [#uses=1] store <4 x float> %4, <4 x float>* undef, align 16 @@ -249,13 +250,13 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { br label %8 ;